Compare commits
174 Commits
v0.1.8
...
ad7b2a02cb
| Author | SHA1 | Date | |
|---|---|---|---|
| ad7b2a02cb | |||
| 72ddd9822c | |||
| 4c30efd802 | |||
| 926a7e6366 | |||
| 1eadc7b75c | |||
| 4a224a127b | |||
| c498fe7aee | |||
| 3838aed954 | |||
| 6a193dbb1c | |||
| e94b341a5b | |||
| 2ad3736852 | |||
| f05b2668c0 | |||
| f7c1c1f809 | |||
| 0e392e18b4 | |||
| 8996f1ce06 | |||
| f60dbaf9c9 | |||
| 1a5a7aa9e1 | |||
| 326367ae4c | |||
| a7f9b0a22f | |||
| 0e525713b1 | |||
| f43fad02fb | |||
| eff3e378f3 | |||
| 5e32cef792 | |||
| 9898167f8d | |||
| 4a553555d3 | |||
| 7a3202c996 | |||
| bd20ca86ab | |||
| 532cf95d59 | |||
| 366f8ce60d | |||
| 7612cb054a | |||
| 966d524dca | |||
| e84f1f1851 | |||
| a059f0502d | |||
| afd7173ba0 | |||
| c807bb2388 | |||
| aa4f9f5566 | |||
| 14786151e5 | |||
| a496862902 | |||
| df4f27ca2e | |||
| d72e0a347e | |||
| 6ed4b7d8ea | |||
| 31ebbea680 | |||
| d878134ebf | |||
| 55568d6892 | |||
| a4ae81c77c | |||
| 9da7104887 | |||
| de5377e5ac | |||
| 80b77b64eb | |||
| 6c912a3d71 | |||
| c6e368324a | |||
| 7b6c096bb7 | |||
| 03353a0aec | |||
| 72f5d9d70c | |||
| be63e27c15 | |||
| 81ef0fe4c7 | |||
| 5f24bd920d | |||
| 8552f193de | |||
| 5536330aeb | |||
| d4657c389d | |||
| 3827235232 | |||
| dfc0058d0d | |||
| 27aef84311 | |||
| 5003514a3d | |||
| 20a314e030 | |||
| d8232340c3 | |||
| a356bb0c4e | |||
| 1c328ee3af | |||
| 5bf7962c04 | |||
| e06f653606 | |||
| 9c2809c195 | |||
| fb32ca0a7d | |||
| 6ab702a818 | |||
| 550e7d435c | |||
| 776967e80d | |||
| 082a7fbcd1 | |||
| ff287cf67b | |||
| bddf36d52d | |||
| cf6cec9cab | |||
| d425839e57 | |||
| 4c661477d5 | |||
| f3f52f14a5 | |||
| d19ba3e305 | |||
| c627f41f53 | |||
| bcad0cd3da | |||
| 67f057ca1c | |||
| 01e79e6993 | |||
| 1e3c4b545f | |||
| 4ecd32a554 | |||
| aa6d7c4d28 | |||
| 6e6d6d32bf | |||
| 54705ab9c4 | |||
| 77a46d0725 | |||
| 0f750b9d89 | |||
| e0dee9db36 | |||
| 126657c99f | |||
| 07fb1ac773 | |||
| 147962e1dd | |||
| 2643a79121 | |||
| e9a035827b | |||
| 033b8a82be | |||
| e76c311231 | |||
| cbdf1a27c8 | |||
| 4a60cb269a | |||
| ebe7f6222d | |||
| 70b61fd8e6 | |||
| a779b002d7 | |||
| 45d21cce21 | |||
| 9629507acd | |||
| 5d6cb4efa1 | |||
| 56ad83bbaf | |||
| 847933b7c0 | |||
| be55d08c0a | |||
| 8c4bf67974 | |||
| 9385d1fe1c | |||
| 0ea54457e8 | |||
| ae26d22388 | |||
| 6b715851b9 | |||
| 62c36f7a6c | |||
| b32f1f94f7 | |||
| 6e3d280a75 | |||
| 704f79dc44 | |||
| 87c7f1bc7d | |||
| 23ea164215 | |||
| 7a8acfb933 | |||
| 71327bcbf1 | |||
| c0603c592b | |||
| 912a7dc74f | |||
| 4de936cea9 | |||
| adb9017580 | |||
| 4adfcc4131 | |||
| ebc315c1cc | |||
| 5ab62a00ff | |||
| 9c3518de63 | |||
| a52657e684 | |||
| 53297abe1e | |||
| a3b9db544c | |||
| f5d2e1c488 | |||
| f04c6a9cdc | |||
| 7a494abb96 | |||
| 956d17a649 | |||
| 5522f9ac04 | |||
| 3742f0228e | |||
| ba694cb717 | |||
| 433d291b4b | |||
| e3509e997f | |||
| 1c30200db0 | |||
| 7ff422d4dc | |||
| 546d51af9a | |||
| 0d1fe05fd0 | |||
| c5d4b2f1cd | |||
| a5d19e2982 | |||
| 692e7e3a6e | |||
| 78dba93ee0 | |||
| 93a5aa6618 | |||
| 9ab750650c | |||
| 609e9db2f7 | |||
| 94a55cf2b7 | |||
| b9cfc45aa2 | |||
| 2d60e36fbf | |||
| c78f7fa6b0 | |||
| b3dce8d13e | |||
| e792b86485 | |||
| cdb86aeea7 | |||
| cdbc156b5b | |||
| 1df8ff9d25 | |||
| 05f1b00473 | |||
| 5ebc97300e | |||
| d2f9c3bded | |||
| 9f347f2caa | |||
| 4ab58e59c2 | |||
| 32232211a1 | |||
| 1cacb80dd6 | |||
| e89bbb62dc | |||
| c8eb3de629 |
@@ -11,3 +11,5 @@ htmlcov
|
||||
logs
|
||||
data
|
||||
tmp
|
||||
myfsio_core/target
|
||||
myfsio-engine/target
|
||||
|
||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -26,6 +26,13 @@ dist/
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
|
||||
# Rust / maturin build artifacts
|
||||
myfsio_core/target/
|
||||
myfsio_core/Cargo.lock
|
||||
|
||||
# Rust engine build artifacts
|
||||
myfsio-engine/target/
|
||||
|
||||
# Local runtime artifacts
|
||||
logs/
|
||||
*.log
|
||||
|
||||
33
Dockerfile
33
Dockerfile
@@ -1,27 +1,39 @@
|
||||
# syntax=docker/dockerfile:1.7
|
||||
FROM python:3.11-slim
|
||||
FROM python:3.14.3-slim
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONUNBUFFERED=1
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install build deps for any wheels that need compilation, then clean up
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends build-essential \
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends build-essential curl \
|
||||
&& curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
COPY requirements.txt ./
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
# Make entrypoint executable
|
||||
RUN pip install --no-cache-dir maturin \
|
||||
&& cd myfsio_core \
|
||||
&& maturin build --release \
|
||||
&& pip install target/wheels/*.whl \
|
||||
&& cd ../myfsio-engine \
|
||||
&& cargo build --release \
|
||||
&& cp target/release/myfsio-server /usr/local/bin/myfsio-server \
|
||||
&& cd .. \
|
||||
&& rm -rf myfsio_core/target myfsio-engine/target \
|
||||
&& pip uninstall -y maturin \
|
||||
&& rustup self uninstall -y
|
||||
|
||||
RUN chmod +x docker-entrypoint.sh
|
||||
|
||||
# Create data directory and set permissions
|
||||
RUN mkdir -p /app/data \
|
||||
&& useradd -m -u 1000 myfsio \
|
||||
&& useradd -m -u 1000 myfsio \
|
||||
&& chown -R myfsio:myfsio /app
|
||||
|
||||
USER myfsio
|
||||
@@ -29,9 +41,10 @@ USER myfsio
|
||||
EXPOSE 5000 5100
|
||||
ENV APP_HOST=0.0.0.0 \
|
||||
FLASK_ENV=production \
|
||||
FLASK_DEBUG=0
|
||||
FLASK_DEBUG=0 \
|
||||
ENGINE=rust
|
||||
|
||||
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
||||
CMD python -c "import requests; requests.get('http://localhost:5000/healthz', timeout=2)"
|
||||
CMD python -c "import requests; requests.get('http://localhost:5000/myfsio/health', timeout=2)"
|
||||
|
||||
CMD ["./docker-entrypoint.sh"]
|
||||
|
||||
661
LICENSE
Normal file
661
LICENSE
Normal file
@@ -0,0 +1,661 @@
|
||||
GNU AFFERO GENERAL PUBLIC LICENSE
|
||||
Version 3, 19 November 2007
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU Affero General Public License is a free, copyleft license for
|
||||
software and other kinds of works, specifically designed to ensure
|
||||
cooperation with the community in the case of network server software.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
our General Public Licenses are intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
Developers that use our General Public Licenses protect your rights
|
||||
with two steps: (1) assert copyright on the software, and (2) offer
|
||||
you this License which gives you legal permission to copy, distribute
|
||||
and/or modify the software.
|
||||
|
||||
A secondary benefit of defending all users' freedom is that
|
||||
improvements made in alternate versions of the program, if they
|
||||
receive widespread use, become available for other developers to
|
||||
incorporate. Many developers of free software are heartened and
|
||||
encouraged by the resulting cooperation. However, in the case of
|
||||
software used on network servers, this result may fail to come about.
|
||||
The GNU General Public License permits making a modified version and
|
||||
letting the public access it on a server without ever releasing its
|
||||
source code to the public.
|
||||
|
||||
The GNU Affero General Public License is designed specifically to
|
||||
ensure that, in such cases, the modified source code becomes available
|
||||
to the community. It requires the operator of a network server to
|
||||
provide the source code of the modified version running there to the
|
||||
users of that server. Therefore, public use of a modified version, on
|
||||
a publicly accessible server, gives the public access to the source
|
||||
code of the modified version.
|
||||
|
||||
An older license, called the Affero General Public License and
|
||||
published by Affero, was designed to accomplish similar goals. This is
|
||||
a different license, not a version of the Affero GPL, but Affero has
|
||||
released a new version of the Affero GPL which permits relicensing under
|
||||
this license.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU Affero General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Remote Network Interaction; Use with the GNU General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, if you modify the
|
||||
Program, your modified version must prominently offer all users
|
||||
interacting with it remotely through a computer network (if your version
|
||||
supports such interaction) an opportunity to receive the Corresponding
|
||||
Source of your version by providing access to the Corresponding Source
|
||||
from a network server at no charge, through some standard or customary
|
||||
means of facilitating copying of software. This Corresponding Source
|
||||
shall include the Corresponding Source for any work covered by version 3
|
||||
of the GNU General Public License that is incorporated pursuant to the
|
||||
following paragraph.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the work with which it is combined will remain governed by version
|
||||
3 of the GNU General Public License.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU Affero General Public License from time to time. Such new versions
|
||||
will be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU Affero General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU Affero General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU Affero General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If your software can interact with users remotely through a computer
|
||||
network, you should also make sure that it provides a way for users to
|
||||
get its source. For example, if your program is a web application, its
|
||||
interface could display a "Source" link that leads users to an archive
|
||||
of the code. There are many ways you could offer source, and different
|
||||
solutions will be better for different programs; see section 13 for the
|
||||
specific requirements.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU AGPL, see
|
||||
<https://www.gnu.org/licenses/>.
|
||||
304
README.md
304
README.md
@@ -1,117 +1,255 @@
|
||||
# MyFSIO (Flask S3 + IAM)
|
||||
# MyFSIO
|
||||
|
||||
MyFSIO is a batteries-included, Flask-based recreation of Amazon S3 and IAM workflows built for local development. The design mirrors the [AWS S3 documentation](https://docs.aws.amazon.com/s3/) wherever practical: bucket naming, Signature Version 4 presigning, Version 2012-10-17 bucket policies, IAM-style users, and familiar REST endpoints.
|
||||
A lightweight, S3-compatible object storage system built with Flask. MyFSIO implements core AWS S3 REST API operations with filesystem-backed storage, making it ideal for local development, testing, and self-hosted storage scenarios.
|
||||
|
||||
## Why MyFSIO?
|
||||
## Features
|
||||
|
||||
- **Dual servers:** Run both the API (port 5000) and UI (port 5100) with a single command: `python run.py`.
|
||||
- **IAM + access keys:** Users, access keys, key rotation, and bucket-scoped actions (`list/read/write/delete/policy`) now live in `data/.myfsio.sys/config/iam.json` and are editable from the IAM dashboard.
|
||||
- **Bucket policies + hot reload:** `data/.myfsio.sys/config/bucket_policies.json` uses AWS' policy grammar (Version `2012-10-17`) with a built-in watcher, so editing the JSON file applies immediately. The UI also ships Public/Private/Custom presets for faster edits.
|
||||
- **Presigned URLs everywhere:** Signature Version 4 presigned URLs respect IAM + bucket policies and replace the now-removed "share link" feature for public access scenarios.
|
||||
- **Modern UI:** Responsive tables, quick filters, preview sidebar, object-level delete buttons, a presign modal, and an inline JSON policy editor that respects dark mode keep bucket management friendly. The object browser supports folder navigation, infinite scroll pagination, bulk operations, and automatic retry on load failures.
|
||||
- **Tests & health:** `/healthz` for smoke checks and `pytest` coverage for IAM, CRUD, presign, and policy flows.
|
||||
**Core Storage**
|
||||
- S3-compatible REST API with AWS Signature Version 4 authentication
|
||||
- Bucket and object CRUD operations
|
||||
- Object versioning with version history
|
||||
- Multipart uploads for large files
|
||||
- Presigned URLs (1 second to 7 days validity)
|
||||
|
||||
## Architecture at a Glance
|
||||
**Security & Access Control**
|
||||
- IAM users with access key management and rotation
|
||||
- Bucket policies (AWS Policy Version 2012-10-17)
|
||||
- Server-side encryption (SSE-S3 and SSE-KMS)
|
||||
- Built-in Key Management Service (KMS)
|
||||
- Rate limiting per endpoint
|
||||
|
||||
**Advanced Features**
|
||||
- Cross-bucket replication to remote S3-compatible endpoints
|
||||
- Hot-reload for bucket policies (no restart required)
|
||||
- CORS configuration per bucket
|
||||
|
||||
**Management UI**
|
||||
- Web console for bucket and object management
|
||||
- IAM dashboard for user administration
|
||||
- Inline JSON policy editor with presets
|
||||
- Object browser with folder navigation and bulk operations
|
||||
- Dark mode support
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
+-----------------+ +----------------+
|
||||
| API Server |<----->| Object storage |
|
||||
| (port 5000) | | (filesystem) |
|
||||
| - S3 routes | +----------------+
|
||||
| - Presigned URLs |
|
||||
| - Bucket policy |
|
||||
+-----------------+
|
||||
^
|
||||
|
|
||||
+-----------------+
|
||||
| UI Server |
|
||||
| (port 5100) |
|
||||
| - Auth console |
|
||||
| - IAM dashboard|
|
||||
| - Bucket editor|
|
||||
+-----------------+
|
||||
+------------------+ +------------------+
|
||||
| API Server | | UI Server |
|
||||
| (port 5000) | | (port 5100) |
|
||||
| | | |
|
||||
| - S3 REST API |<------->| - Web Console |
|
||||
| - SigV4 Auth | | - IAM Dashboard |
|
||||
| - Presign URLs | | - Bucket Editor |
|
||||
+--------+---------+ +------------------+
|
||||
|
|
||||
v
|
||||
+------------------+ +------------------+
|
||||
| Object Storage | | System Metadata |
|
||||
| (filesystem) | | (.myfsio.sys/) |
|
||||
| | | |
|
||||
| data/<bucket>/ | | - IAM config |
|
||||
| <objects> | | - Bucket policies|
|
||||
| | | - Encryption keys|
|
||||
+------------------+ +------------------+
|
||||
```
|
||||
|
||||
Both apps load the same configuration via `AppConfig` so IAM data and bucket policies stay consistent no matter which process you run.
|
||||
Bucket policies are automatically reloaded whenever `bucket_policies.json` changes—no restarts required.
|
||||
|
||||
## Getting Started
|
||||
## Quick Start
|
||||
|
||||
```bash
|
||||
# Clone and setup
|
||||
git clone https://gitea.jzwsite.com/kqjy/MyFSIO
|
||||
cd s3
|
||||
python -m venv .venv
|
||||
. .venv/Scripts/activate # PowerShell: .\.venv\Scripts\Activate.ps1
|
||||
|
||||
# Activate virtual environment
|
||||
# Windows PowerShell:
|
||||
.\.venv\Scripts\Activate.ps1
|
||||
# Windows CMD:
|
||||
.venv\Scripts\activate.bat
|
||||
# Linux/macOS:
|
||||
source .venv/bin/activate
|
||||
|
||||
# Install dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
# Run both API and UI (default)
|
||||
# (Optional) Build Rust native extension for better performance
|
||||
# Requires Rust toolchain: https://rustup.rs
|
||||
pip install maturin
|
||||
cd myfsio_core && maturin develop --release && cd ..
|
||||
|
||||
# Start both servers
|
||||
python run.py
|
||||
|
||||
# Or run individually:
|
||||
# python run.py --mode api
|
||||
# python run.py --mode ui
|
||||
# Or start individually
|
||||
python run.py --mode api # API only (port 5000)
|
||||
python run.py --mode ui # UI only (port 5100)
|
||||
```
|
||||
|
||||
Visit `http://127.0.0.1:5100/ui` for the console and `http://127.0.0.1:5000/` for the raw API. Override ports/hosts with the environment variables listed below.
|
||||
**Credentials:** Generated automatically on first run and printed to the console. If missed, check the IAM config file at `<STORAGE_ROOT>/.myfsio.sys/config/iam.json`.
|
||||
|
||||
## IAM, Access Keys, and Bucket Policies
|
||||
|
||||
- First run creates `data/.myfsio.sys/config/iam.json` with `localadmin / localadmin` (full control). Sign in via the UI, then use the **IAM** tab to create users, rotate secrets, or edit inline policies without touching JSON by hand.
|
||||
- Bucket policies live in `data/.myfsio.sys/config/bucket_policies.json` and follow the AWS `arn:aws:s3:::bucket/key` resource syntax with Version `2012-10-17`. Attach/replace/remove policies from the bucket detail page or edit the JSON by hand—changes hot reload automatically.
|
||||
- IAM actions include extended verbs (`iam:list_users`, `iam:create_user`, `iam:update_policy`, etc.) so you can control who is allowed to manage other users and policies.
|
||||
|
||||
### Bucket Policy Presets & Hot Reload
|
||||
|
||||
- **Presets:** Every bucket detail view includes Public (read-only), Private (detach policy), and Custom presets. Public auto-populates a policy that grants anonymous `s3:ListBucket` + `s3:GetObject` access to the entire bucket.
|
||||
- **Custom drafts:** Switching back to Custom restores your last manual edit so you can toggle between presets without losing work.
|
||||
- **Hot reload:** The server watches `bucket_policies.json` and reloads statements on-the-fly—ideal for editing policies in your favorite editor while testing Via curl or the UI.
|
||||
|
||||
## Presigned URLs
|
||||
|
||||
Presigned URLs follow the AWS CLI playbook:
|
||||
|
||||
- Call `POST /presign/<bucket>/<key>` (or use the "Presign" button in the UI) to request a Signature Version 4 URL valid for 1 second to 7 days.
|
||||
- The generated URL honors IAM permissions and bucket-policy decisions at generation-time and again when somebody fetches it.
|
||||
- Because presigned URLs cover both authenticated and public sharing scenarios, the legacy "share link" feature has been removed.
|
||||
- **Web Console:** http://127.0.0.1:5100/ui
|
||||
- **API Endpoint:** http://127.0.0.1:5000
|
||||
|
||||
## Configuration
|
||||
|
||||
| Variable | Default | Description |
|
||||
| --- | --- | --- |
|
||||
| `STORAGE_ROOT` | `<project>/data` | Filesystem root for bucket directories |
|
||||
| `MAX_UPLOAD_SIZE` | `1073741824` | Maximum upload size (bytes) |
|
||||
| `UI_PAGE_SIZE` | `100` | `MaxKeys` hint for listings |
|
||||
| `SECRET_KEY` | `dev-secret-key` | Flask session secret for the UI |
|
||||
| `IAM_CONFIG` | `<project>/data/.myfsio.sys/config/iam.json` | IAM user + policy store |
|
||||
| `BUCKET_POLICY_PATH` | `<project>/data/.myfsio.sys/config/bucket_policies.json` | Bucket policy store |
|
||||
| `API_BASE_URL` | `http://127.0.0.1:5000` | Used by the UI when calling API endpoints (presign, bucket policy) |
|
||||
| `AWS_REGION` | `us-east-1` | Region used in Signature V4 scope |
|
||||
| `AWS_SERVICE` | `s3` | Service used in Signature V4 scope |
|
||||
|----------|---------|-------------|
|
||||
| `STORAGE_ROOT` | `./data` | Filesystem root for bucket storage |
|
||||
| `IAM_CONFIG` | `.myfsio.sys/config/iam.json` | IAM user and policy store |
|
||||
| `BUCKET_POLICY_PATH` | `.myfsio.sys/config/bucket_policies.json` | Bucket policy store |
|
||||
| `API_BASE_URL` | `http://127.0.0.1:5000` | API endpoint for UI calls |
|
||||
| `MAX_UPLOAD_SIZE` | `1073741824` | Maximum upload size in bytes (1 GB) |
|
||||
| `MULTIPART_MIN_PART_SIZE` | `5242880` | Minimum multipart part size (5 MB) |
|
||||
| `UI_PAGE_SIZE` | `100` | Default page size for listings |
|
||||
| `SECRET_KEY` | `dev-secret-key` | Flask session secret |
|
||||
| `AWS_REGION` | `us-east-1` | Region for SigV4 signing |
|
||||
| `AWS_SERVICE` | `s3` | Service name for SigV4 signing |
|
||||
| `ENCRYPTION_ENABLED` | `false` | Enable server-side encryption |
|
||||
| `KMS_ENABLED` | `false` | Enable Key Management Service |
|
||||
| `LOG_LEVEL` | `INFO` | Logging verbosity |
|
||||
| `SIGV4_TIMESTAMP_TOLERANCE_SECONDS` | `900` | Max time skew for SigV4 requests |
|
||||
| `PRESIGNED_URL_MAX_EXPIRY_SECONDS` | `604800` | Max presigned URL expiry (7 days) |
|
||||
| `REPLICATION_CONNECT_TIMEOUT_SECONDS` | `5` | Replication connection timeout |
|
||||
| `SITE_SYNC_ENABLED` | `false` | Enable bi-directional site sync |
|
||||
| `OBJECT_TAG_LIMIT` | `50` | Maximum tags per object |
|
||||
|
||||
> Buckets now live directly under `data/` while system metadata (versions, IAM, bucket policies, multipart uploads, etc.) lives in `data/.myfsio.sys`.
|
||||
|
||||
## API Cheatsheet (IAM headers required)
|
||||
## Data Layout
|
||||
|
||||
```
|
||||
GET / -> List buckets (XML)
|
||||
PUT /<bucket> -> Create bucket
|
||||
DELETE /<bucket> -> Delete bucket (must be empty)
|
||||
GET /<bucket> -> List objects (XML)
|
||||
PUT /<bucket>/<key> -> Upload object (binary stream)
|
||||
GET /<bucket>/<key> -> Download object
|
||||
DELETE /<bucket>/<key> -> Delete object
|
||||
POST /presign/<bucket>/<key> -> Generate AWS SigV4 presigned URL (JSON)
|
||||
GET /bucket-policy/<bucket> -> Fetch bucket policy (JSON)
|
||||
PUT /bucket-policy/<bucket> -> Attach/replace bucket policy (JSON)
|
||||
DELETE /bucket-policy/<bucket> -> Remove bucket policy
|
||||
data/
|
||||
├── <bucket>/ # User buckets with objects
|
||||
└── .myfsio.sys/ # System metadata
|
||||
├── config/
|
||||
│ ├── iam.json # IAM users and policies
|
||||
│ ├── bucket_policies.json # Bucket policies
|
||||
│ ├── replication_rules.json
|
||||
│ └── connections.json # Remote S3 connections
|
||||
├── buckets/<bucket>/
|
||||
│ ├── meta/ # Object metadata (.meta.json)
|
||||
│ ├── versions/ # Archived object versions
|
||||
│ └── .bucket.json # Bucket config (versioning, CORS)
|
||||
├── multipart/ # Active multipart uploads
|
||||
└── keys/ # Encryption keys (SSE-S3/KMS)
|
||||
```
|
||||
|
||||
## API Reference
|
||||
|
||||
All endpoints require AWS Signature Version 4 authentication unless using presigned URLs or public bucket policies.
|
||||
|
||||
### Bucket Operations
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
|--------|----------|-------------|
|
||||
| `GET` | `/` | List all buckets |
|
||||
| `PUT` | `/<bucket>` | Create bucket |
|
||||
| `DELETE` | `/<bucket>` | Delete bucket (must be empty) |
|
||||
| `HEAD` | `/<bucket>` | Check bucket exists |
|
||||
|
||||
### Object Operations
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
|--------|----------|-------------|
|
||||
| `GET` | `/<bucket>` | List objects (supports `list-type=2`) |
|
||||
| `PUT` | `/<bucket>/<key>` | Upload object |
|
||||
| `GET` | `/<bucket>/<key>` | Download object |
|
||||
| `DELETE` | `/<bucket>/<key>` | Delete object |
|
||||
| `HEAD` | `/<bucket>/<key>` | Get object metadata |
|
||||
| `POST` | `/<bucket>/<key>?uploads` | Initiate multipart upload |
|
||||
| `PUT` | `/<bucket>/<key>?partNumber=N&uploadId=X` | Upload part |
|
||||
| `POST` | `/<bucket>/<key>?uploadId=X` | Complete multipart upload |
|
||||
| `DELETE` | `/<bucket>/<key>?uploadId=X` | Abort multipart upload |
|
||||
|
||||
### Bucket Policies (S3-compatible)
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
|--------|----------|-------------|
|
||||
| `GET` | `/<bucket>?policy` | Get bucket policy |
|
||||
| `PUT` | `/<bucket>?policy` | Set bucket policy |
|
||||
| `DELETE` | `/<bucket>?policy` | Delete bucket policy |
|
||||
|
||||
### Versioning
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
|--------|----------|-------------|
|
||||
| `GET` | `/<bucket>/<key>?versionId=X` | Get specific version |
|
||||
| `DELETE` | `/<bucket>/<key>?versionId=X` | Delete specific version |
|
||||
| `GET` | `/<bucket>?versions` | List object versions |
|
||||
|
||||
### Health Check
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
|--------|----------|-------------|
|
||||
| `GET` | `/myfsio/health` | Health check endpoint |
|
||||
|
||||
## IAM & Access Control
|
||||
|
||||
### Users and Access Keys
|
||||
|
||||
On first run, MyFSIO creates a default admin user (`localadmin`/`localadmin`). Use the IAM dashboard to:
|
||||
|
||||
- Create and delete users
|
||||
- Generate and rotate access keys
|
||||
- Attach inline policies to users
|
||||
- Control IAM management permissions
|
||||
|
||||
### Bucket Policies
|
||||
|
||||
Bucket policies follow AWS policy grammar (Version `2012-10-17`) with support for:
|
||||
|
||||
- Principal-based access (`*` for anonymous, specific users)
|
||||
- Action-based permissions (`s3:GetObject`, `s3:PutObject`, etc.)
|
||||
- Resource patterns (`arn:aws:s3:::bucket/*`)
|
||||
- Condition keys
|
||||
|
||||
**Policy Presets:**
|
||||
- **Public:** Grants anonymous read access (`s3:GetObject`, `s3:ListBucket`)
|
||||
- **Private:** Removes bucket policy (IAM-only access)
|
||||
- **Custom:** Manual policy editing with draft preservation
|
||||
|
||||
Policies hot-reload when the JSON file changes.
|
||||
|
||||
## Server-Side Encryption
|
||||
|
||||
MyFSIO supports two encryption modes:
|
||||
|
||||
- **SSE-S3:** Server-managed keys with automatic key rotation
|
||||
- **SSE-KMS:** Customer-managed keys via built-in KMS
|
||||
|
||||
Enable encryption with:
|
||||
```bash
|
||||
ENCRYPTION_ENABLED=true python run.py
|
||||
```
|
||||
|
||||
## Cross-Bucket Replication
|
||||
|
||||
Replicate objects to remote S3-compatible endpoints:
|
||||
|
||||
1. Configure remote connections in the UI
|
||||
2. Create replication rules specifying source/destination
|
||||
3. Objects are automatically replicated on upload
|
||||
|
||||
## Docker
|
||||
|
||||
```bash
|
||||
docker build -t myfsio .
|
||||
docker run -p 5000:5000 -p 5100:5100 -v ./data:/app/data myfsio
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
```bash
|
||||
pytest -q
|
||||
# Run all tests
|
||||
pytest tests/ -v
|
||||
|
||||
# Run specific test file
|
||||
pytest tests/test_api.py -v
|
||||
|
||||
# Run with coverage
|
||||
pytest tests/ --cov=app --cov-report=html
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
- [Amazon Simple Storage Service Documentation](https://docs.aws.amazon.com/s3/)
|
||||
- [Signature Version 4 Signing Process](https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html)
|
||||
- [Amazon S3 Bucket Policy Examples](https://docs.aws.amazon.com/AmazonS3/latest/userguide/example-bucket-policies.html)
|
||||
- [Amazon S3 Documentation](https://docs.aws.amazon.com/s3/)
|
||||
- [AWS Signature Version 4](https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html)
|
||||
- [S3 Bucket Policy Examples](https://docs.aws.amazon.com/AmazonS3/latest/userguide/example-bucket-policies.html)
|
||||
|
||||
504
app/__init__.py
504
app/__init__.py
@@ -1,21 +1,29 @@
|
||||
"""Application factory for the mini S3-compatible object store."""
|
||||
from __future__ import annotations
|
||||
|
||||
import html as html_module
|
||||
import itertools
|
||||
import logging
|
||||
import mimetypes
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
import uuid
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from pathlib import Path
|
||||
from datetime import timedelta
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from flask import Flask, g, has_request_context, redirect, render_template, request, url_for
|
||||
from flask import Flask, Response, g, has_request_context, redirect, render_template, request, url_for
|
||||
from flask_cors import CORS
|
||||
from flask_wtf.csrf import CSRFError
|
||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||
|
||||
import io
|
||||
|
||||
from .access_logging import AccessLoggingService
|
||||
from .operation_metrics import OperationMetricsCollector, classify_endpoint
|
||||
from .compression import GzipMiddleware
|
||||
from .acl import AclService
|
||||
from .bucket_policies import BucketPolicyStore
|
||||
from .config import AppConfig
|
||||
from .connections import ConnectionStore
|
||||
@@ -23,10 +31,77 @@ from .encryption import EncryptionManager
|
||||
from .extensions import limiter, csrf
|
||||
from .iam import IamService
|
||||
from .kms import KMSManager
|
||||
from .gc import GarbageCollector
|
||||
from .integrity import IntegrityChecker
|
||||
from .lifecycle import LifecycleManager
|
||||
from .notifications import NotificationService
|
||||
from .object_lock import ObjectLockService
|
||||
from .replication import ReplicationManager
|
||||
from .secret_store import EphemeralSecretStore
|
||||
from .storage import ObjectStorage
|
||||
from .site_registry import SiteRegistry, SiteInfo
|
||||
from .storage import ObjectStorage, StorageError
|
||||
from .version import get_version
|
||||
from .website_domains import WebsiteDomainStore
|
||||
|
||||
_request_counter = itertools.count(1)
|
||||
|
||||
|
||||
class _ChunkedTransferMiddleware:
|
||||
|
||||
def __init__(self, app):
|
||||
self.app = app
|
||||
|
||||
def __call__(self, environ, start_response):
|
||||
if environ.get("REQUEST_METHOD") not in ("PUT", "POST"):
|
||||
return self.app(environ, start_response)
|
||||
|
||||
transfer_encoding = environ.get("HTTP_TRANSFER_ENCODING", "")
|
||||
content_length = environ.get("CONTENT_LENGTH")
|
||||
|
||||
if "chunked" in transfer_encoding.lower():
|
||||
if content_length:
|
||||
del environ["HTTP_TRANSFER_ENCODING"]
|
||||
else:
|
||||
raw = environ.get("wsgi.input")
|
||||
if raw:
|
||||
try:
|
||||
if hasattr(raw, "seek"):
|
||||
raw.seek(0)
|
||||
body = raw.read()
|
||||
except Exception:
|
||||
body = b""
|
||||
if body:
|
||||
environ["wsgi.input"] = io.BytesIO(body)
|
||||
environ["CONTENT_LENGTH"] = str(len(body))
|
||||
del environ["HTTP_TRANSFER_ENCODING"]
|
||||
|
||||
content_length = environ.get("CONTENT_LENGTH")
|
||||
if not content_length or content_length == "0":
|
||||
sha256 = environ.get("HTTP_X_AMZ_CONTENT_SHA256", "")
|
||||
decoded_len = environ.get("HTTP_X_AMZ_DECODED_CONTENT_LENGTH", "")
|
||||
content_encoding = environ.get("HTTP_CONTENT_ENCODING", "")
|
||||
if ("STREAMING" in sha256.upper() or decoded_len
|
||||
or "aws-chunked" in content_encoding.lower()):
|
||||
raw = environ.get("wsgi.input")
|
||||
if raw:
|
||||
try:
|
||||
if hasattr(raw, "seek"):
|
||||
raw.seek(0)
|
||||
body = raw.read()
|
||||
except Exception:
|
||||
body = b""
|
||||
if body:
|
||||
environ["wsgi.input"] = io.BytesIO(body)
|
||||
environ["CONTENT_LENGTH"] = str(len(body))
|
||||
|
||||
raw = environ.get("wsgi.input")
|
||||
if raw and hasattr(raw, "seek"):
|
||||
try:
|
||||
raw.seek(0)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return self.app(environ, start_response)
|
||||
|
||||
|
||||
def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
|
||||
@@ -83,7 +158,19 @@ def create_app(
|
||||
app.config.setdefault("WTF_CSRF_ENABLED", False)
|
||||
|
||||
# Trust X-Forwarded-* headers from proxies
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1)
|
||||
num_proxies = app.config.get("NUM_TRUSTED_PROXIES", 1)
|
||||
if num_proxies:
|
||||
if "NUM_TRUSTED_PROXIES" not in os.environ:
|
||||
logging.getLogger(__name__).warning(
|
||||
"NUM_TRUSTED_PROXIES not set, defaulting to 1. "
|
||||
"Set NUM_TRUSTED_PROXIES=0 if not behind a reverse proxy."
|
||||
)
|
||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=num_proxies, x_proto=num_proxies, x_host=num_proxies, x_prefix=num_proxies)
|
||||
|
||||
if app.config.get("ENABLE_GZIP", True):
|
||||
app.wsgi_app = GzipMiddleware(app.wsgi_app, compression_level=6)
|
||||
|
||||
app.wsgi_app = _ChunkedTransferMiddleware(app.wsgi_app)
|
||||
|
||||
_configure_cors(app)
|
||||
_configure_logging(app)
|
||||
@@ -91,11 +178,23 @@ def create_app(
|
||||
limiter.init_app(app)
|
||||
csrf.init_app(app)
|
||||
|
||||
storage = ObjectStorage(Path(app.config["STORAGE_ROOT"]))
|
||||
storage = ObjectStorage(
|
||||
Path(app.config["STORAGE_ROOT"]),
|
||||
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 60),
|
||||
object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100),
|
||||
bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0),
|
||||
object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024),
|
||||
meta_read_cache_max=app.config.get("META_READ_CACHE_MAX", 2048),
|
||||
)
|
||||
|
||||
if app.config.get("WARM_CACHE_ON_STARTUP", True) and not app.config.get("TESTING"):
|
||||
storage.warm_cache_async()
|
||||
|
||||
iam = IamService(
|
||||
Path(app.config["IAM_CONFIG"]),
|
||||
auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5),
|
||||
auth_lockout_minutes=app.config.get("AUTH_LOCKOUT_MINUTES", 15),
|
||||
encryption_key=app.config.get("SECRET_KEY"),
|
||||
)
|
||||
bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
|
||||
secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
|
||||
@@ -120,12 +219,33 @@ def create_app(
|
||||
)
|
||||
|
||||
connections = ConnectionStore(connections_path)
|
||||
replication = ReplicationManager(storage, connections, replication_rules_path)
|
||||
|
||||
replication = ReplicationManager(
|
||||
storage,
|
||||
connections,
|
||||
replication_rules_path,
|
||||
storage_root,
|
||||
connect_timeout=app.config.get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5),
|
||||
read_timeout=app.config.get("REPLICATION_READ_TIMEOUT_SECONDS", 30),
|
||||
max_retries=app.config.get("REPLICATION_MAX_RETRIES", 2),
|
||||
streaming_threshold_bytes=app.config.get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024),
|
||||
max_failures_per_bucket=app.config.get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50),
|
||||
)
|
||||
|
||||
site_registry_path = config_dir / "site_registry.json"
|
||||
site_registry = SiteRegistry(site_registry_path)
|
||||
if app.config.get("SITE_ID") and not site_registry.get_local_site():
|
||||
site_registry.set_local_site(SiteInfo(
|
||||
site_id=app.config["SITE_ID"],
|
||||
endpoint=app.config.get("SITE_ENDPOINT") or "",
|
||||
region=app.config.get("SITE_REGION", "us-east-1"),
|
||||
priority=app.config.get("SITE_PRIORITY", 100),
|
||||
))
|
||||
|
||||
encryption_config = {
|
||||
"encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False),
|
||||
"encryption_master_key_path": app.config.get("ENCRYPTION_MASTER_KEY_PATH"),
|
||||
"default_encryption_algorithm": app.config.get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"),
|
||||
"encryption_chunk_size_bytes": app.config.get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024),
|
||||
}
|
||||
encryption_manager = EncryptionManager(encryption_config)
|
||||
|
||||
@@ -133,13 +253,63 @@ def create_app(
|
||||
if app.config.get("KMS_ENABLED", False):
|
||||
kms_keys_path = Path(app.config.get("KMS_KEYS_PATH", ""))
|
||||
kms_master_key_path = Path(app.config.get("ENCRYPTION_MASTER_KEY_PATH", ""))
|
||||
kms_manager = KMSManager(kms_keys_path, kms_master_key_path)
|
||||
kms_manager = KMSManager(
|
||||
kms_keys_path,
|
||||
kms_master_key_path,
|
||||
generate_data_key_min_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1),
|
||||
generate_data_key_max_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024),
|
||||
)
|
||||
encryption_manager.set_kms_provider(kms_manager)
|
||||
|
||||
if app.config.get("ENCRYPTION_ENABLED", False):
|
||||
from .encrypted_storage import EncryptedObjectStorage
|
||||
storage = EncryptedObjectStorage(storage, encryption_manager)
|
||||
|
||||
acl_service = AclService(storage_root)
|
||||
object_lock_service = ObjectLockService(storage_root)
|
||||
notification_service = NotificationService(
|
||||
storage_root,
|
||||
allow_internal_endpoints=app.config.get("ALLOW_INTERNAL_ENDPOINTS", False),
|
||||
)
|
||||
access_logging_service = AccessLoggingService(storage_root)
|
||||
access_logging_service.set_storage(storage)
|
||||
|
||||
lifecycle_manager = None
|
||||
if app.config.get("LIFECYCLE_ENABLED", False):
|
||||
base_storage = storage.storage if hasattr(storage, 'storage') else storage
|
||||
lifecycle_manager = LifecycleManager(
|
||||
base_storage,
|
||||
interval_seconds=app.config.get("LIFECYCLE_INTERVAL_SECONDS", 3600),
|
||||
storage_root=storage_root,
|
||||
max_history_per_bucket=app.config.get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50),
|
||||
)
|
||||
lifecycle_manager.start()
|
||||
|
||||
gc_collector = None
|
||||
if app.config.get("GC_ENABLED", False):
|
||||
gc_collector = GarbageCollector(
|
||||
storage_root=storage_root,
|
||||
interval_hours=app.config.get("GC_INTERVAL_HOURS", 6.0),
|
||||
temp_file_max_age_hours=app.config.get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0),
|
||||
multipart_max_age_days=app.config.get("GC_MULTIPART_MAX_AGE_DAYS", 7),
|
||||
lock_file_max_age_hours=app.config.get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0),
|
||||
dry_run=app.config.get("GC_DRY_RUN", False),
|
||||
io_throttle_ms=app.config.get("GC_IO_THROTTLE_MS", 10),
|
||||
)
|
||||
gc_collector.start()
|
||||
|
||||
integrity_checker = None
|
||||
if app.config.get("INTEGRITY_ENABLED", False):
|
||||
integrity_checker = IntegrityChecker(
|
||||
storage_root=storage_root,
|
||||
interval_hours=app.config.get("INTEGRITY_INTERVAL_HOURS", 24.0),
|
||||
batch_size=app.config.get("INTEGRITY_BATCH_SIZE", 1000),
|
||||
auto_heal=app.config.get("INTEGRITY_AUTO_HEAL", False),
|
||||
dry_run=app.config.get("INTEGRITY_DRY_RUN", False),
|
||||
io_throttle_ms=app.config.get("INTEGRITY_IO_THROTTLE_MS", 10),
|
||||
)
|
||||
integrity_checker.start()
|
||||
|
||||
app.extensions["object_storage"] = storage
|
||||
app.extensions["iam"] = iam
|
||||
app.extensions["bucket_policies"] = bucket_policies
|
||||
@@ -149,14 +319,99 @@ def create_app(
|
||||
app.extensions["replication"] = replication
|
||||
app.extensions["encryption"] = encryption_manager
|
||||
app.extensions["kms"] = kms_manager
|
||||
app.extensions["acl"] = acl_service
|
||||
app.extensions["lifecycle"] = lifecycle_manager
|
||||
app.extensions["gc"] = gc_collector
|
||||
app.extensions["integrity"] = integrity_checker
|
||||
app.extensions["object_lock"] = object_lock_service
|
||||
app.extensions["notifications"] = notification_service
|
||||
app.extensions["access_logging"] = access_logging_service
|
||||
app.extensions["site_registry"] = site_registry
|
||||
|
||||
website_domains_store = None
|
||||
if app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||
website_domains_path = config_dir / "website_domains.json"
|
||||
website_domains_store = WebsiteDomainStore(website_domains_path)
|
||||
app.extensions["website_domains"] = website_domains_store
|
||||
|
||||
from .s3_client import S3ProxyClient
|
||||
api_base = app.config.get("API_BASE_URL") or "http://127.0.0.1:5000"
|
||||
app.extensions["s3_proxy"] = S3ProxyClient(
|
||||
api_base_url=api_base,
|
||||
region=app.config.get("AWS_REGION", "us-east-1"),
|
||||
)
|
||||
|
||||
operation_metrics_collector = None
|
||||
if app.config.get("OPERATION_METRICS_ENABLED", False):
|
||||
operation_metrics_collector = OperationMetricsCollector(
|
||||
storage_root,
|
||||
interval_minutes=app.config.get("OPERATION_METRICS_INTERVAL_MINUTES", 5),
|
||||
retention_hours=app.config.get("OPERATION_METRICS_RETENTION_HOURS", 24),
|
||||
)
|
||||
app.extensions["operation_metrics"] = operation_metrics_collector
|
||||
|
||||
system_metrics_collector = None
|
||||
if app.config.get("METRICS_HISTORY_ENABLED", False):
|
||||
from .system_metrics import SystemMetricsCollector
|
||||
system_metrics_collector = SystemMetricsCollector(
|
||||
storage_root,
|
||||
interval_minutes=app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5),
|
||||
retention_hours=app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24),
|
||||
)
|
||||
system_metrics_collector.set_storage(storage)
|
||||
app.extensions["system_metrics"] = system_metrics_collector
|
||||
|
||||
site_sync_worker = None
|
||||
if app.config.get("SITE_SYNC_ENABLED", False):
|
||||
from .site_sync import SiteSyncWorker
|
||||
site_sync_worker = SiteSyncWorker(
|
||||
storage=storage,
|
||||
connections=connections,
|
||||
replication_manager=replication,
|
||||
storage_root=storage_root,
|
||||
interval_seconds=app.config.get("SITE_SYNC_INTERVAL_SECONDS", 60),
|
||||
batch_size=app.config.get("SITE_SYNC_BATCH_SIZE", 100),
|
||||
connect_timeout=app.config.get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10),
|
||||
read_timeout=app.config.get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120),
|
||||
max_retries=app.config.get("SITE_SYNC_MAX_RETRIES", 2),
|
||||
clock_skew_tolerance_seconds=app.config.get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0),
|
||||
)
|
||||
site_sync_worker.start()
|
||||
app.extensions["site_sync"] = site_sync_worker
|
||||
|
||||
@app.errorhandler(500)
|
||||
def internal_error(error):
|
||||
return render_template('500.html'), 500
|
||||
wants_html = request.accept_mimetypes.accept_html
|
||||
path = request.path or ""
|
||||
if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
|
||||
return render_template('500.html'), 500
|
||||
error_xml = (
|
||||
'<?xml version="1.0" encoding="UTF-8"?>'
|
||||
'<Error>'
|
||||
'<Code>InternalError</Code>'
|
||||
'<Message>An internal server error occurred</Message>'
|
||||
f'<Resource>{path}</Resource>'
|
||||
f'<RequestId>{getattr(g, "request_id", "-")}</RequestId>'
|
||||
'</Error>'
|
||||
)
|
||||
return error_xml, 500, {'Content-Type': 'application/xml'}
|
||||
|
||||
@app.errorhandler(CSRFError)
|
||||
def handle_csrf_error(e):
|
||||
return render_template('csrf_error.html', reason=e.description), 400
|
||||
wants_html = request.accept_mimetypes.accept_html
|
||||
path = request.path or ""
|
||||
if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
|
||||
return render_template('csrf_error.html', reason=e.description), 400
|
||||
error_xml = (
|
||||
'<?xml version="1.0" encoding="UTF-8"?>'
|
||||
'<Error>'
|
||||
'<Code>CSRFError</Code>'
|
||||
f'<Message>{e.description}</Message>'
|
||||
f'<Resource>{path}</Resource>'
|
||||
f'<RequestId>{getattr(g, "request_id", "-")}</RequestId>'
|
||||
'</Error>'
|
||||
)
|
||||
return error_xml, 400, {'Content-Type': 'application/xml'}
|
||||
|
||||
@app.template_filter("filesizeformat")
|
||||
def filesizeformat(value: int) -> str:
|
||||
@@ -190,14 +445,41 @@ def create_app(
|
||||
except (ValueError, OSError):
|
||||
return "Unknown"
|
||||
|
||||
@app.template_filter("format_datetime")
|
||||
def format_datetime_filter(dt, include_tz: bool = True) -> str:
|
||||
"""Format datetime object as human-readable string in configured timezone."""
|
||||
from datetime import datetime, timezone as dt_timezone
|
||||
from zoneinfo import ZoneInfo
|
||||
if not dt:
|
||||
return ""
|
||||
try:
|
||||
display_tz = app.config.get("DISPLAY_TIMEZONE", "UTC")
|
||||
if display_tz and display_tz != "UTC":
|
||||
try:
|
||||
tz = ZoneInfo(display_tz)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=dt_timezone.utc)
|
||||
dt = dt.astimezone(tz)
|
||||
except (KeyError, ValueError):
|
||||
pass
|
||||
tz_abbr = dt.strftime("%Z") or "UTC"
|
||||
if include_tz:
|
||||
return f"{dt.strftime('%b %d, %Y %H:%M')} ({tz_abbr})"
|
||||
return dt.strftime("%b %d, %Y %H:%M")
|
||||
except (ValueError, AttributeError):
|
||||
return str(dt)
|
||||
|
||||
if include_api:
|
||||
from .s3_api import s3_api_bp
|
||||
from .kms_api import kms_api_bp
|
||||
from .admin_api import admin_api_bp
|
||||
|
||||
app.register_blueprint(s3_api_bp)
|
||||
app.register_blueprint(kms_api_bp)
|
||||
app.register_blueprint(admin_api_bp)
|
||||
csrf.exempt(s3_api_bp)
|
||||
csrf.exempt(kms_api_bp)
|
||||
csrf.exempt(admin_api_bp)
|
||||
|
||||
if include_ui:
|
||||
from .ui import ui_bp
|
||||
@@ -217,9 +499,9 @@ def create_app(
|
||||
return render_template("404.html"), 404
|
||||
return error
|
||||
|
||||
@app.get("/healthz")
|
||||
@app.get("/myfsio/health")
|
||||
def healthcheck() -> Dict[str, str]:
|
||||
return {"status": "ok", "version": app.config.get("APP_VERSION", "unknown")}
|
||||
return {"status": "ok"}
|
||||
|
||||
return app
|
||||
|
||||
@@ -265,17 +547,17 @@ def _configure_logging(app: Flask) -> None:
|
||||
formatter = logging.Formatter(
|
||||
"%(asctime)s | %(levelname)s | %(request_id)s | %(method)s %(path)s | %(message)s"
|
||||
)
|
||||
|
||||
# Stream Handler (stdout) - Primary for Docker
|
||||
|
||||
stream_handler = logging.StreamHandler(sys.stdout)
|
||||
stream_handler.setFormatter(formatter)
|
||||
stream_handler.addFilter(_RequestContextFilter())
|
||||
|
||||
logger = app.logger
|
||||
for handler in logger.handlers[:]:
|
||||
handler.close()
|
||||
logger.handlers.clear()
|
||||
logger.addHandler(stream_handler)
|
||||
|
||||
# File Handler (optional, if configured)
|
||||
if app.config.get("LOG_TO_FILE"):
|
||||
log_file = Path(app.config["LOG_FILE"])
|
||||
log_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
@@ -293,27 +575,189 @@ def _configure_logging(app: Flask) -> None:
|
||||
|
||||
@app.before_request
|
||||
def _log_request_start() -> None:
|
||||
g.request_id = uuid.uuid4().hex
|
||||
g.request_id = f"{os.getpid():x}{next(_request_counter):012x}"
|
||||
g.request_started_at = time.perf_counter()
|
||||
app.logger.info(
|
||||
"Request started",
|
||||
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
|
||||
)
|
||||
g.request_bytes_in = request.content_length or 0
|
||||
|
||||
@app.before_request
|
||||
def _maybe_serve_website():
|
||||
if not app.config.get("WEBSITE_HOSTING_ENABLED"):
|
||||
return None
|
||||
if request.method not in {"GET", "HEAD"}:
|
||||
return None
|
||||
host = request.host
|
||||
if ":" in host:
|
||||
host = host.rsplit(":", 1)[0]
|
||||
host = host.lower()
|
||||
store = app.extensions.get("website_domains")
|
||||
if not store:
|
||||
return None
|
||||
bucket = store.get_bucket(host)
|
||||
if not bucket:
|
||||
return None
|
||||
storage = app.extensions["object_storage"]
|
||||
if not storage.bucket_exists(bucket):
|
||||
return _website_error_response(404, "Not Found")
|
||||
website_config = storage.get_bucket_website(bucket)
|
||||
if not website_config:
|
||||
return _website_error_response(404, "Not Found")
|
||||
index_doc = website_config.get("index_document", "index.html")
|
||||
error_doc = website_config.get("error_document")
|
||||
req_path = request.path.lstrip("/")
|
||||
if not req_path or req_path.endswith("/"):
|
||||
object_key = req_path + index_doc
|
||||
else:
|
||||
object_key = req_path
|
||||
try:
|
||||
obj_path = storage.get_object_path(bucket, object_key)
|
||||
except (StorageError, OSError):
|
||||
if object_key == req_path:
|
||||
try:
|
||||
obj_path = storage.get_object_path(bucket, req_path + "/" + index_doc)
|
||||
object_key = req_path + "/" + index_doc
|
||||
except (StorageError, OSError):
|
||||
return _serve_website_error(storage, bucket, error_doc, 404)
|
||||
else:
|
||||
return _serve_website_error(storage, bucket, error_doc, 404)
|
||||
content_type = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
|
||||
is_encrypted = False
|
||||
try:
|
||||
metadata = storage.get_object_metadata(bucket, object_key)
|
||||
is_encrypted = "x-amz-server-side-encryption" in metadata
|
||||
except (StorageError, OSError):
|
||||
pass
|
||||
if is_encrypted and hasattr(storage, "get_object_data"):
|
||||
try:
|
||||
data, _ = storage.get_object_data(bucket, object_key)
|
||||
file_size = len(data)
|
||||
except (StorageError, OSError):
|
||||
return _website_error_response(500, "Internal Server Error")
|
||||
else:
|
||||
data = None
|
||||
try:
|
||||
stat = obj_path.stat()
|
||||
file_size = stat.st_size
|
||||
except OSError:
|
||||
return _website_error_response(500, "Internal Server Error")
|
||||
if request.method == "HEAD":
|
||||
response = Response(status=200)
|
||||
response.headers["Content-Length"] = file_size
|
||||
response.headers["Content-Type"] = content_type
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
return response
|
||||
from .s3_api import _parse_range_header
|
||||
range_header = request.headers.get("Range")
|
||||
if range_header:
|
||||
ranges = _parse_range_header(range_header, file_size)
|
||||
if ranges is None:
|
||||
return Response(status=416, headers={"Content-Range": f"bytes */{file_size}"})
|
||||
start, end = ranges[0]
|
||||
length = end - start + 1
|
||||
if data is not None:
|
||||
partial_data = data[start:end + 1]
|
||||
response = Response(partial_data, status=206, mimetype=content_type)
|
||||
else:
|
||||
def _stream_range(file_path, start_pos, length_to_read):
|
||||
with file_path.open("rb") as f:
|
||||
f.seek(start_pos)
|
||||
remaining = length_to_read
|
||||
while remaining > 0:
|
||||
chunk = f.read(min(262144, remaining))
|
||||
if not chunk:
|
||||
break
|
||||
remaining -= len(chunk)
|
||||
yield chunk
|
||||
response = Response(_stream_range(obj_path, start, length), status=206, mimetype=content_type, direct_passthrough=True)
|
||||
response.headers["Content-Range"] = f"bytes {start}-{end}/{file_size}"
|
||||
response.headers["Content-Length"] = length
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
return response
|
||||
if data is not None:
|
||||
response = Response(data, mimetype=content_type)
|
||||
response.headers["Content-Length"] = file_size
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
return response
|
||||
def _stream(file_path):
|
||||
with file_path.open("rb") as f:
|
||||
while True:
|
||||
chunk = f.read(65536)
|
||||
if not chunk:
|
||||
break
|
||||
yield chunk
|
||||
response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
|
||||
response.headers["Content-Length"] = file_size
|
||||
response.headers["Accept-Ranges"] = "bytes"
|
||||
return response
|
||||
|
||||
def _serve_website_error(storage, bucket, error_doc_key, status_code):
|
||||
if not error_doc_key:
|
||||
return _website_error_response(status_code, "Not Found" if status_code == 404 else "Error")
|
||||
try:
|
||||
obj_path = storage.get_object_path(bucket, error_doc_key)
|
||||
except (StorageError, OSError):
|
||||
return _website_error_response(status_code, "Not Found")
|
||||
content_type = mimetypes.guess_type(error_doc_key)[0] or "text/html"
|
||||
is_encrypted = False
|
||||
try:
|
||||
metadata = storage.get_object_metadata(bucket, error_doc_key)
|
||||
is_encrypted = "x-amz-server-side-encryption" in metadata
|
||||
except (StorageError, OSError):
|
||||
pass
|
||||
if is_encrypted and hasattr(storage, "get_object_data"):
|
||||
try:
|
||||
data, _ = storage.get_object_data(bucket, error_doc_key)
|
||||
response = Response(data, status=status_code, mimetype=content_type)
|
||||
response.headers["Content-Length"] = len(data)
|
||||
return response
|
||||
except (StorageError, OSError):
|
||||
return _website_error_response(status_code, "Not Found")
|
||||
try:
|
||||
data = obj_path.read_bytes()
|
||||
response = Response(data, status=status_code, mimetype=content_type)
|
||||
response.headers["Content-Length"] = len(data)
|
||||
return response
|
||||
except OSError:
|
||||
return _website_error_response(status_code, "Not Found")
|
||||
|
||||
def _website_error_response(status_code, message):
|
||||
safe_msg = html_module.escape(str(message))
|
||||
safe_code = html_module.escape(str(status_code))
|
||||
body = f"<html><head><title>{safe_code} {safe_msg}</title></head><body><h1>{safe_code} {safe_msg}</h1></body></html>"
|
||||
return Response(body, status=status_code, mimetype="text/html")
|
||||
|
||||
@app.after_request
|
||||
def _log_request_end(response):
|
||||
duration_ms = 0.0
|
||||
if hasattr(g, "request_started_at"):
|
||||
duration_ms = (time.perf_counter() - g.request_started_at) * 1000
|
||||
request_id = getattr(g, "request_id", uuid.uuid4().hex)
|
||||
request_id = getattr(g, "request_id", f"{os.getpid():x}{next(_request_counter):012x}")
|
||||
response.headers.setdefault("X-Request-ID", request_id)
|
||||
app.logger.info(
|
||||
"Request completed",
|
||||
extra={
|
||||
"path": request.path,
|
||||
"method": request.method,
|
||||
"remote_addr": request.remote_addr,
|
||||
},
|
||||
)
|
||||
if app.logger.isEnabledFor(logging.INFO):
|
||||
app.logger.info(
|
||||
"Request completed",
|
||||
extra={
|
||||
"path": request.path,
|
||||
"method": request.method,
|
||||
"remote_addr": request.remote_addr,
|
||||
},
|
||||
)
|
||||
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
|
||||
response.headers["Server"] = "MyFSIO"
|
||||
|
||||
operation_metrics = app.extensions.get("operation_metrics")
|
||||
if operation_metrics:
|
||||
bytes_in = getattr(g, "request_bytes_in", 0)
|
||||
bytes_out = response.content_length or 0
|
||||
error_code = getattr(g, "s3_error_code", None)
|
||||
endpoint_type = classify_endpoint(request.path)
|
||||
operation_metrics.record_request(
|
||||
method=request.method,
|
||||
endpoint_type=endpoint_type,
|
||||
status_code=response.status_code,
|
||||
latency_ms=duration_ms,
|
||||
bytes_in=bytes_in,
|
||||
bytes_out=bytes_out,
|
||||
error_code=error_code,
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
265
app/access_logging.py
Normal file
265
app/access_logging.py
Normal file
@@ -0,0 +1,265 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import queue
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class AccessLogEntry:
|
||||
bucket_owner: str = "-"
|
||||
bucket: str = "-"
|
||||
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
remote_ip: str = "-"
|
||||
requester: str = "-"
|
||||
request_id: str = field(default_factory=lambda: uuid.uuid4().hex[:16].upper())
|
||||
operation: str = "-"
|
||||
key: str = "-"
|
||||
request_uri: str = "-"
|
||||
http_status: int = 200
|
||||
error_code: str = "-"
|
||||
bytes_sent: int = 0
|
||||
object_size: int = 0
|
||||
total_time_ms: int = 0
|
||||
turn_around_time_ms: int = 0
|
||||
referrer: str = "-"
|
||||
user_agent: str = "-"
|
||||
version_id: str = "-"
|
||||
host_id: str = "-"
|
||||
signature_version: str = "SigV4"
|
||||
cipher_suite: str = "-"
|
||||
authentication_type: str = "AuthHeader"
|
||||
host_header: str = "-"
|
||||
tls_version: str = "-"
|
||||
|
||||
def to_log_line(self) -> str:
|
||||
time_str = self.timestamp.strftime("[%d/%b/%Y:%H:%M:%S %z]")
|
||||
return (
|
||||
f'{self.bucket_owner} {self.bucket} {time_str} {self.remote_ip} '
|
||||
f'{self.requester} {self.request_id} {self.operation} {self.key} '
|
||||
f'"{self.request_uri}" {self.http_status} {self.error_code or "-"} '
|
||||
f'{self.bytes_sent or "-"} {self.object_size or "-"} {self.total_time_ms or "-"} '
|
||||
f'{self.turn_around_time_ms or "-"} "{self.referrer}" "{self.user_agent}" {self.version_id}'
|
||||
)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"bucket_owner": self.bucket_owner,
|
||||
"bucket": self.bucket,
|
||||
"timestamp": self.timestamp.isoformat(),
|
||||
"remote_ip": self.remote_ip,
|
||||
"requester": self.requester,
|
||||
"request_id": self.request_id,
|
||||
"operation": self.operation,
|
||||
"key": self.key,
|
||||
"request_uri": self.request_uri,
|
||||
"http_status": self.http_status,
|
||||
"error_code": self.error_code,
|
||||
"bytes_sent": self.bytes_sent,
|
||||
"object_size": self.object_size,
|
||||
"total_time_ms": self.total_time_ms,
|
||||
"referrer": self.referrer,
|
||||
"user_agent": self.user_agent,
|
||||
"version_id": self.version_id,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class LoggingConfiguration:
|
||||
target_bucket: str
|
||||
target_prefix: str = ""
|
||||
enabled: bool = True
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"LoggingEnabled": {
|
||||
"TargetBucket": self.target_bucket,
|
||||
"TargetPrefix": self.target_prefix,
|
||||
}
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> Optional["LoggingConfiguration"]:
|
||||
logging_enabled = data.get("LoggingEnabled")
|
||||
if not logging_enabled:
|
||||
return None
|
||||
return cls(
|
||||
target_bucket=logging_enabled.get("TargetBucket", ""),
|
||||
target_prefix=logging_enabled.get("TargetPrefix", ""),
|
||||
enabled=True,
|
||||
)
|
||||
|
||||
|
||||
class AccessLoggingService:
|
||||
def __init__(self, storage_root: Path, flush_interval: int = 60, max_buffer_size: int = 1000):
|
||||
self.storage_root = storage_root
|
||||
self.flush_interval = flush_interval
|
||||
self.max_buffer_size = max_buffer_size
|
||||
self._configs: Dict[str, LoggingConfiguration] = {}
|
||||
self._buffer: Dict[str, List[AccessLogEntry]] = {}
|
||||
self._buffer_lock = threading.Lock()
|
||||
self._shutdown = threading.Event()
|
||||
self._storage = None
|
||||
|
||||
self._flush_thread = threading.Thread(target=self._flush_loop, name="access-log-flush", daemon=True)
|
||||
self._flush_thread.start()
|
||||
|
||||
def set_storage(self, storage: Any) -> None:
|
||||
self._storage = storage
|
||||
|
||||
def _config_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "logging.json"
|
||||
|
||||
def get_bucket_logging(self, bucket_name: str) -> Optional[LoggingConfiguration]:
|
||||
if bucket_name in self._configs:
|
||||
return self._configs[bucket_name]
|
||||
|
||||
config_path = self._config_path(bucket_name)
|
||||
if not config_path.exists():
|
||||
return None
|
||||
|
||||
try:
|
||||
data = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
config = LoggingConfiguration.from_dict(data)
|
||||
if config:
|
||||
self._configs[bucket_name] = config
|
||||
return config
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
logger.warning(f"Failed to load logging config for {bucket_name}: {e}")
|
||||
return None
|
||||
|
||||
def set_bucket_logging(self, bucket_name: str, config: LoggingConfiguration) -> None:
|
||||
config_path = self._config_path(bucket_name)
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
config_path.write_text(json.dumps(config.to_dict(), indent=2), encoding="utf-8")
|
||||
self._configs[bucket_name] = config
|
||||
|
||||
def delete_bucket_logging(self, bucket_name: str) -> None:
|
||||
config_path = self._config_path(bucket_name)
|
||||
try:
|
||||
if config_path.exists():
|
||||
config_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
self._configs.pop(bucket_name, None)
|
||||
|
||||
def log_request(
|
||||
self,
|
||||
bucket_name: str,
|
||||
*,
|
||||
operation: str,
|
||||
key: str = "-",
|
||||
remote_ip: str = "-",
|
||||
requester: str = "-",
|
||||
request_uri: str = "-",
|
||||
http_status: int = 200,
|
||||
error_code: str = "",
|
||||
bytes_sent: int = 0,
|
||||
object_size: int = 0,
|
||||
total_time_ms: int = 0,
|
||||
referrer: str = "-",
|
||||
user_agent: str = "-",
|
||||
version_id: str = "-",
|
||||
request_id: str = "",
|
||||
) -> None:
|
||||
config = self.get_bucket_logging(bucket_name)
|
||||
if not config or not config.enabled:
|
||||
return
|
||||
|
||||
entry = AccessLogEntry(
|
||||
bucket_owner="local-owner",
|
||||
bucket=bucket_name,
|
||||
remote_ip=remote_ip,
|
||||
requester=requester,
|
||||
request_id=request_id or uuid.uuid4().hex[:16].upper(),
|
||||
operation=operation,
|
||||
key=key,
|
||||
request_uri=request_uri,
|
||||
http_status=http_status,
|
||||
error_code=error_code,
|
||||
bytes_sent=bytes_sent,
|
||||
object_size=object_size,
|
||||
total_time_ms=total_time_ms,
|
||||
referrer=referrer,
|
||||
user_agent=user_agent,
|
||||
version_id=version_id,
|
||||
)
|
||||
|
||||
target_key = f"{config.target_bucket}:{config.target_prefix}"
|
||||
should_flush = False
|
||||
with self._buffer_lock:
|
||||
if target_key not in self._buffer:
|
||||
self._buffer[target_key] = []
|
||||
self._buffer[target_key].append(entry)
|
||||
should_flush = len(self._buffer[target_key]) >= self.max_buffer_size
|
||||
|
||||
if should_flush:
|
||||
self._flush_buffer(target_key)
|
||||
|
||||
def _flush_loop(self) -> None:
|
||||
while not self._shutdown.is_set():
|
||||
self._shutdown.wait(timeout=self.flush_interval)
|
||||
if not self._shutdown.is_set():
|
||||
self._flush_all()
|
||||
|
||||
def _flush_all(self) -> None:
|
||||
with self._buffer_lock:
|
||||
targets = list(self._buffer.keys())
|
||||
|
||||
for target_key in targets:
|
||||
self._flush_buffer(target_key)
|
||||
|
||||
def _flush_buffer(self, target_key: str) -> None:
|
||||
with self._buffer_lock:
|
||||
entries = self._buffer.pop(target_key, [])
|
||||
|
||||
if not entries or not self._storage:
|
||||
return
|
||||
|
||||
try:
|
||||
bucket_name, prefix = target_key.split(":", 1)
|
||||
except ValueError:
|
||||
logger.error(f"Invalid target key: {target_key}")
|
||||
return
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
log_key = f"{prefix}{now.strftime('%Y-%m-%d-%H-%M-%S')}-{uuid.uuid4().hex[:8]}"
|
||||
|
||||
log_content = "\n".join(entry.to_log_line() for entry in entries) + "\n"
|
||||
|
||||
try:
|
||||
stream = io.BytesIO(log_content.encode("utf-8"))
|
||||
self._storage.put_object(bucket_name, log_key, stream, enforce_quota=False)
|
||||
logger.info(f"Flushed {len(entries)} access log entries to {bucket_name}/{log_key}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to write access log to {bucket_name}/{log_key}: {e}")
|
||||
with self._buffer_lock:
|
||||
if target_key not in self._buffer:
|
||||
self._buffer[target_key] = []
|
||||
self._buffer[target_key] = entries + self._buffer[target_key]
|
||||
|
||||
def flush(self) -> None:
|
||||
self._flush_all()
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._shutdown.set()
|
||||
self._flush_all()
|
||||
self._flush_thread.join(timeout=5.0)
|
||||
|
||||
def get_stats(self) -> Dict[str, Any]:
|
||||
with self._buffer_lock:
|
||||
buffered = sum(len(entries) for entries in self._buffer.values())
|
||||
return {
|
||||
"buffered_entries": buffered,
|
||||
"target_buckets": len(self._buffer),
|
||||
}
|
||||
204
app/acl.py
Normal file
204
app/acl.py
Normal file
@@ -0,0 +1,204 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
|
||||
|
||||
ACL_PERMISSION_FULL_CONTROL = "FULL_CONTROL"
|
||||
ACL_PERMISSION_WRITE = "WRITE"
|
||||
ACL_PERMISSION_WRITE_ACP = "WRITE_ACP"
|
||||
ACL_PERMISSION_READ = "READ"
|
||||
ACL_PERMISSION_READ_ACP = "READ_ACP"
|
||||
|
||||
ALL_PERMISSIONS = {
|
||||
ACL_PERMISSION_FULL_CONTROL,
|
||||
ACL_PERMISSION_WRITE,
|
||||
ACL_PERMISSION_WRITE_ACP,
|
||||
ACL_PERMISSION_READ,
|
||||
ACL_PERMISSION_READ_ACP,
|
||||
}
|
||||
|
||||
PERMISSION_TO_ACTIONS = {
|
||||
ACL_PERMISSION_FULL_CONTROL: {"read", "write", "delete", "list", "share"},
|
||||
ACL_PERMISSION_WRITE: {"write", "delete"},
|
||||
ACL_PERMISSION_WRITE_ACP: {"share"},
|
||||
ACL_PERMISSION_READ: {"read", "list"},
|
||||
ACL_PERMISSION_READ_ACP: {"share"},
|
||||
}
|
||||
|
||||
GRANTEE_ALL_USERS = "*"
|
||||
GRANTEE_AUTHENTICATED_USERS = "authenticated"
|
||||
|
||||
|
||||
@dataclass
|
||||
class AclGrant:
|
||||
grantee: str
|
||||
permission: str
|
||||
|
||||
def to_dict(self) -> Dict[str, str]:
|
||||
return {"grantee": self.grantee, "permission": self.permission}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, str]) -> "AclGrant":
|
||||
return cls(grantee=data["grantee"], permission=data["permission"])
|
||||
|
||||
|
||||
@dataclass
|
||||
class Acl:
|
||||
owner: str
|
||||
grants: List[AclGrant] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"owner": self.owner,
|
||||
"grants": [g.to_dict() for g in self.grants],
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "Acl":
|
||||
return cls(
|
||||
owner=data.get("owner", ""),
|
||||
grants=[AclGrant.from_dict(g) for g in data.get("grants", [])],
|
||||
)
|
||||
|
||||
def get_allowed_actions(self, principal_id: Optional[str], is_authenticated: bool = True) -> Set[str]:
|
||||
actions: Set[str] = set()
|
||||
if principal_id and principal_id == self.owner:
|
||||
actions.update(PERMISSION_TO_ACTIONS[ACL_PERMISSION_FULL_CONTROL])
|
||||
for grant in self.grants:
|
||||
if grant.grantee == GRANTEE_ALL_USERS:
|
||||
actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set()))
|
||||
elif grant.grantee == GRANTEE_AUTHENTICATED_USERS and is_authenticated:
|
||||
actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set()))
|
||||
elif principal_id and grant.grantee == principal_id:
|
||||
actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set()))
|
||||
return actions
|
||||
|
||||
|
||||
CANNED_ACLS = {
|
||||
"private": lambda owner: Acl(
|
||||
owner=owner,
|
||||
grants=[AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL)],
|
||||
),
|
||||
"public-read": lambda owner: Acl(
|
||||
owner=owner,
|
||||
grants=[
|
||||
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
|
||||
AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ),
|
||||
],
|
||||
),
|
||||
"public-read-write": lambda owner: Acl(
|
||||
owner=owner,
|
||||
grants=[
|
||||
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
|
||||
AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ),
|
||||
AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_WRITE),
|
||||
],
|
||||
),
|
||||
"authenticated-read": lambda owner: Acl(
|
||||
owner=owner,
|
||||
grants=[
|
||||
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
|
||||
AclGrant(grantee=GRANTEE_AUTHENTICATED_USERS, permission=ACL_PERMISSION_READ),
|
||||
],
|
||||
),
|
||||
"bucket-owner-read": lambda owner: Acl(
|
||||
owner=owner,
|
||||
grants=[
|
||||
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
|
||||
],
|
||||
),
|
||||
"bucket-owner-full-control": lambda owner: Acl(
|
||||
owner=owner,
|
||||
grants=[
|
||||
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
|
||||
],
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def create_canned_acl(canned_acl: str, owner: str) -> Acl:
|
||||
factory = CANNED_ACLS.get(canned_acl)
|
||||
if not factory:
|
||||
return CANNED_ACLS["private"](owner)
|
||||
return factory(owner)
|
||||
|
||||
|
||||
class AclService:
|
||||
def __init__(self, storage_root: Path):
|
||||
self.storage_root = storage_root
|
||||
self._bucket_acl_cache: Dict[str, Acl] = {}
|
||||
|
||||
def _bucket_acl_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / ".acl.json"
|
||||
|
||||
def get_bucket_acl(self, bucket_name: str) -> Optional[Acl]:
|
||||
if bucket_name in self._bucket_acl_cache:
|
||||
return self._bucket_acl_cache[bucket_name]
|
||||
acl_path = self._bucket_acl_path(bucket_name)
|
||||
if not acl_path.exists():
|
||||
return None
|
||||
try:
|
||||
data = json.loads(acl_path.read_text(encoding="utf-8"))
|
||||
acl = Acl.from_dict(data)
|
||||
self._bucket_acl_cache[bucket_name] = acl
|
||||
return acl
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return None
|
||||
|
||||
def set_bucket_acl(self, bucket_name: str, acl: Acl) -> None:
|
||||
acl_path = self._bucket_acl_path(bucket_name)
|
||||
acl_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
acl_path.write_text(json.dumps(acl.to_dict(), indent=2), encoding="utf-8")
|
||||
self._bucket_acl_cache[bucket_name] = acl
|
||||
|
||||
def set_bucket_canned_acl(self, bucket_name: str, canned_acl: str, owner: str) -> Acl:
|
||||
acl = create_canned_acl(canned_acl, owner)
|
||||
self.set_bucket_acl(bucket_name, acl)
|
||||
return acl
|
||||
|
||||
def delete_bucket_acl(self, bucket_name: str) -> None:
|
||||
acl_path = self._bucket_acl_path(bucket_name)
|
||||
if acl_path.exists():
|
||||
acl_path.unlink()
|
||||
self._bucket_acl_cache.pop(bucket_name, None)
|
||||
|
||||
def evaluate_bucket_acl(
|
||||
self,
|
||||
bucket_name: str,
|
||||
principal_id: Optional[str],
|
||||
action: str,
|
||||
is_authenticated: bool = True,
|
||||
) -> bool:
|
||||
acl = self.get_bucket_acl(bucket_name)
|
||||
if not acl:
|
||||
return False
|
||||
allowed_actions = acl.get_allowed_actions(principal_id, is_authenticated)
|
||||
return action in allowed_actions
|
||||
|
||||
def get_object_acl(self, bucket_name: str, object_key: str, object_metadata: Dict[str, Any]) -> Optional[Acl]:
|
||||
acl_data = object_metadata.get("__acl__")
|
||||
if not acl_data:
|
||||
return None
|
||||
try:
|
||||
return Acl.from_dict(acl_data)
|
||||
except (TypeError, KeyError):
|
||||
return None
|
||||
|
||||
def create_object_acl_metadata(self, acl: Acl) -> Dict[str, Any]:
|
||||
return {"__acl__": acl.to_dict()}
|
||||
|
||||
def evaluate_object_acl(
|
||||
self,
|
||||
object_metadata: Dict[str, Any],
|
||||
principal_id: Optional[str],
|
||||
action: str,
|
||||
is_authenticated: bool = True,
|
||||
) -> bool:
|
||||
acl = self.get_object_acl("", "", object_metadata)
|
||||
if not acl:
|
||||
return False
|
||||
allowed_actions = acl.get_allowed_actions(principal_id, is_authenticated)
|
||||
return action in allowed_actions
|
||||
984
app/admin_api.py
Normal file
984
app/admin_api.py
Normal file
@@ -0,0 +1,984 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import socket
|
||||
import time
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from flask import Blueprint, Response, current_app, jsonify, request
|
||||
|
||||
from .connections import ConnectionStore
|
||||
from .extensions import limiter
|
||||
from .gc import GarbageCollector
|
||||
from .integrity import IntegrityChecker
|
||||
from .iam import IamError, Principal
|
||||
from .replication import ReplicationManager
|
||||
from .site_registry import PeerSite, SiteInfo, SiteRegistry
|
||||
from .website_domains import WebsiteDomainStore, normalize_domain, is_valid_domain
|
||||
|
||||
|
||||
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
||||
"""Check if a URL is safe to make requests to (not internal/private).
|
||||
|
||||
Args:
|
||||
url: The URL to check.
|
||||
allow_internal: If True, allows internal/private IP addresses.
|
||||
Use for self-hosted deployments on internal networks.
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.hostname
|
||||
if not hostname:
|
||||
return False
|
||||
cloud_metadata_hosts = {
|
||||
"metadata.google.internal",
|
||||
"169.254.169.254",
|
||||
}
|
||||
if hostname.lower() in cloud_metadata_hosts:
|
||||
return False
|
||||
if allow_internal:
|
||||
return True
|
||||
blocked_hosts = {
|
||||
"localhost",
|
||||
"127.0.0.1",
|
||||
"0.0.0.0",
|
||||
"::1",
|
||||
"[::1]",
|
||||
}
|
||||
if hostname.lower() in blocked_hosts:
|
||||
return False
|
||||
try:
|
||||
resolved_ip = socket.gethostbyname(hostname)
|
||||
ip = ipaddress.ip_address(resolved_ip)
|
||||
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
|
||||
return False
|
||||
except (socket.gaierror, ValueError):
|
||||
return False
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _validate_endpoint(endpoint: str) -> Optional[str]:
|
||||
"""Validate endpoint URL format. Returns error message or None."""
|
||||
try:
|
||||
parsed = urlparse(endpoint)
|
||||
if not parsed.scheme or parsed.scheme not in ("http", "https"):
|
||||
return "Endpoint must be http or https URL"
|
||||
if not parsed.netloc:
|
||||
return "Endpoint must have a host"
|
||||
return None
|
||||
except Exception:
|
||||
return "Invalid endpoint URL"
|
||||
|
||||
|
||||
def _validate_priority(priority: Any) -> Optional[str]:
|
||||
"""Validate priority value. Returns error message or None."""
|
||||
try:
|
||||
p = int(priority)
|
||||
if p < 0 or p > 1000:
|
||||
return "Priority must be between 0 and 1000"
|
||||
return None
|
||||
except (TypeError, ValueError):
|
||||
return "Priority must be an integer"
|
||||
|
||||
|
||||
def _validate_region(region: str) -> Optional[str]:
|
||||
"""Validate region format. Returns error message or None."""
|
||||
if not re.match(r"^[a-z]{2,}-[a-z]+-\d+$", region):
|
||||
return "Region must match format like us-east-1"
|
||||
return None
|
||||
|
||||
|
||||
def _validate_site_id(site_id: str) -> Optional[str]:
|
||||
"""Validate site_id format. Returns error message or None."""
|
||||
if not site_id or len(site_id) > 63:
|
||||
return "site_id must be 1-63 characters"
|
||||
if not re.match(r'^[a-zA-Z0-9][a-zA-Z0-9_-]*$', site_id):
|
||||
return "site_id must start with alphanumeric and contain only alphanumeric, hyphens, underscores"
|
||||
return None
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
admin_api_bp = Blueprint("admin_api", __name__, url_prefix="/admin")
|
||||
|
||||
|
||||
def _require_principal() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
|
||||
from .s3_api import _require_principal as s3_require_principal
|
||||
return s3_require_principal()
|
||||
|
||||
|
||||
def _require_admin() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return None, error
|
||||
|
||||
try:
|
||||
_iam().authorize(principal, None, "iam:*")
|
||||
return principal, None
|
||||
except IamError:
|
||||
return None, _json_error("AccessDenied", "Admin access required", 403)
|
||||
|
||||
|
||||
def _site_registry() -> SiteRegistry:
|
||||
return current_app.extensions["site_registry"]
|
||||
|
||||
|
||||
def _connections() -> ConnectionStore:
|
||||
return current_app.extensions["connections"]
|
||||
|
||||
|
||||
def _replication() -> ReplicationManager:
|
||||
return current_app.extensions["replication"]
|
||||
|
||||
|
||||
def _iam():
|
||||
return current_app.extensions["iam"]
|
||||
|
||||
|
||||
def _json_error(code: str, message: str, status: int) -> Tuple[Dict[str, Any], int]:
|
||||
return {"error": {"code": code, "message": message}}, status
|
||||
|
||||
|
||||
def _get_admin_rate_limit() -> str:
|
||||
return current_app.config.get("RATE_LIMIT_ADMIN", "60 per minute")
|
||||
|
||||
|
||||
@admin_api_bp.route("/site", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def get_local_site():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
local_site = registry.get_local_site()
|
||||
|
||||
if local_site:
|
||||
return jsonify(local_site.to_dict())
|
||||
|
||||
config_site_id = current_app.config.get("SITE_ID")
|
||||
config_endpoint = current_app.config.get("SITE_ENDPOINT")
|
||||
|
||||
if config_site_id:
|
||||
return jsonify({
|
||||
"site_id": config_site_id,
|
||||
"endpoint": config_endpoint or "",
|
||||
"region": current_app.config.get("SITE_REGION", "us-east-1"),
|
||||
"priority": current_app.config.get("SITE_PRIORITY", 100),
|
||||
"display_name": config_site_id,
|
||||
"source": "environment",
|
||||
})
|
||||
|
||||
return _json_error("NotFound", "Local site not configured", 404)
|
||||
|
||||
|
||||
@admin_api_bp.route("/site", methods=["PUT"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def update_local_site():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
|
||||
site_id = payload.get("site_id")
|
||||
endpoint = payload.get("endpoint")
|
||||
|
||||
if not site_id:
|
||||
return _json_error("ValidationError", "site_id is required", 400)
|
||||
|
||||
site_id_error = _validate_site_id(site_id)
|
||||
if site_id_error:
|
||||
return _json_error("ValidationError", site_id_error, 400)
|
||||
|
||||
if endpoint:
|
||||
endpoint_error = _validate_endpoint(endpoint)
|
||||
if endpoint_error:
|
||||
return _json_error("ValidationError", endpoint_error, 400)
|
||||
|
||||
if "priority" in payload:
|
||||
priority_error = _validate_priority(payload["priority"])
|
||||
if priority_error:
|
||||
return _json_error("ValidationError", priority_error, 400)
|
||||
|
||||
if "region" in payload:
|
||||
region_error = _validate_region(payload["region"])
|
||||
if region_error:
|
||||
return _json_error("ValidationError", region_error, 400)
|
||||
|
||||
registry = _site_registry()
|
||||
existing = registry.get_local_site()
|
||||
|
||||
site = SiteInfo(
|
||||
site_id=site_id,
|
||||
endpoint=endpoint or "",
|
||||
region=payload.get("region", "us-east-1"),
|
||||
priority=payload.get("priority", 100),
|
||||
display_name=payload.get("display_name", site_id),
|
||||
created_at=existing.created_at if existing else None,
|
||||
)
|
||||
|
||||
registry.set_local_site(site)
|
||||
|
||||
logger.info("Local site updated", extra={"site_id": site_id, "principal": principal.access_key})
|
||||
return jsonify(site.to_dict())
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def list_all_sites():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
local = registry.get_local_site()
|
||||
peers = registry.list_peers()
|
||||
|
||||
result = {
|
||||
"local": local.to_dict() if local else None,
|
||||
"peers": [peer.to_dict() for peer in peers],
|
||||
"total_peers": len(peers),
|
||||
}
|
||||
|
||||
return jsonify(result)
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def register_peer_site():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
|
||||
site_id = payload.get("site_id")
|
||||
endpoint = payload.get("endpoint")
|
||||
|
||||
if not site_id:
|
||||
return _json_error("ValidationError", "site_id is required", 400)
|
||||
|
||||
site_id_error = _validate_site_id(site_id)
|
||||
if site_id_error:
|
||||
return _json_error("ValidationError", site_id_error, 400)
|
||||
|
||||
if not endpoint:
|
||||
return _json_error("ValidationError", "endpoint is required", 400)
|
||||
|
||||
endpoint_error = _validate_endpoint(endpoint)
|
||||
if endpoint_error:
|
||||
return _json_error("ValidationError", endpoint_error, 400)
|
||||
|
||||
region = payload.get("region", "us-east-1")
|
||||
region_error = _validate_region(region)
|
||||
if region_error:
|
||||
return _json_error("ValidationError", region_error, 400)
|
||||
|
||||
priority = payload.get("priority", 100)
|
||||
priority_error = _validate_priority(priority)
|
||||
if priority_error:
|
||||
return _json_error("ValidationError", priority_error, 400)
|
||||
|
||||
registry = _site_registry()
|
||||
|
||||
if registry.get_peer(site_id):
|
||||
return _json_error("AlreadyExists", f"Peer site '{site_id}' already exists", 409)
|
||||
|
||||
connection_id = payload.get("connection_id")
|
||||
if connection_id:
|
||||
if not _connections().get(connection_id):
|
||||
return _json_error("ValidationError", f"Connection '{connection_id}' not found", 400)
|
||||
|
||||
peer = PeerSite(
|
||||
site_id=site_id,
|
||||
endpoint=endpoint,
|
||||
region=region,
|
||||
priority=int(priority),
|
||||
display_name=payload.get("display_name", site_id),
|
||||
connection_id=connection_id,
|
||||
)
|
||||
|
||||
registry.add_peer(peer)
|
||||
|
||||
logger.info("Peer site registered", extra={"site_id": site_id, "principal": principal.access_key})
|
||||
return jsonify(peer.to_dict()), 201
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites/<site_id>", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def get_peer_site(site_id: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
peer = registry.get_peer(site_id)
|
||||
|
||||
if not peer:
|
||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
||||
|
||||
return jsonify(peer.to_dict())
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites/<site_id>", methods=["PUT"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def update_peer_site(site_id: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
existing = registry.get_peer(site_id)
|
||||
|
||||
if not existing:
|
||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
||||
|
||||
payload = request.get_json(silent=True) or {}
|
||||
|
||||
if "endpoint" in payload:
|
||||
endpoint_error = _validate_endpoint(payload["endpoint"])
|
||||
if endpoint_error:
|
||||
return _json_error("ValidationError", endpoint_error, 400)
|
||||
|
||||
if "priority" in payload:
|
||||
priority_error = _validate_priority(payload["priority"])
|
||||
if priority_error:
|
||||
return _json_error("ValidationError", priority_error, 400)
|
||||
|
||||
if "region" in payload:
|
||||
region_error = _validate_region(payload["region"])
|
||||
if region_error:
|
||||
return _json_error("ValidationError", region_error, 400)
|
||||
|
||||
if "connection_id" in payload:
|
||||
if payload["connection_id"] and not _connections().get(payload["connection_id"]):
|
||||
return _json_error("ValidationError", f"Connection '{payload['connection_id']}' not found", 400)
|
||||
|
||||
peer = PeerSite(
|
||||
site_id=site_id,
|
||||
endpoint=payload.get("endpoint", existing.endpoint),
|
||||
region=payload.get("region", existing.region),
|
||||
priority=payload.get("priority", existing.priority),
|
||||
display_name=payload.get("display_name", existing.display_name),
|
||||
connection_id=payload.get("connection_id", existing.connection_id),
|
||||
created_at=existing.created_at,
|
||||
is_healthy=existing.is_healthy,
|
||||
last_health_check=existing.last_health_check,
|
||||
)
|
||||
|
||||
registry.update_peer(peer)
|
||||
|
||||
logger.info("Peer site updated", extra={"site_id": site_id, "principal": principal.access_key})
|
||||
return jsonify(peer.to_dict())
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites/<site_id>", methods=["DELETE"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def delete_peer_site(site_id: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
|
||||
if not registry.delete_peer(site_id):
|
||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
||||
|
||||
logger.info("Peer site deleted", extra={"site_id": site_id, "principal": principal.access_key})
|
||||
return Response(status=204)
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites/<site_id>/health", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def check_peer_health(site_id: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
peer = registry.get_peer(site_id)
|
||||
|
||||
if not peer:
|
||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
||||
|
||||
is_healthy = False
|
||||
error_message = None
|
||||
|
||||
if peer.connection_id:
|
||||
connection = _connections().get(peer.connection_id)
|
||||
if connection:
|
||||
is_healthy = _replication().check_endpoint_health(connection)
|
||||
else:
|
||||
error_message = f"Connection '{peer.connection_id}' not found"
|
||||
else:
|
||||
error_message = "No connection configured for this peer"
|
||||
|
||||
registry.update_health(site_id, is_healthy)
|
||||
|
||||
result = {
|
||||
"site_id": site_id,
|
||||
"is_healthy": is_healthy,
|
||||
"checked_at": time.time(),
|
||||
}
|
||||
if error_message:
|
||||
result["error"] = error_message
|
||||
|
||||
return jsonify(result)
|
||||
|
||||
|
||||
@admin_api_bp.route("/topology", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def get_topology():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
local = registry.get_local_site()
|
||||
peers = registry.list_peers()
|
||||
|
||||
sites = []
|
||||
|
||||
if local:
|
||||
sites.append({
|
||||
**local.to_dict(),
|
||||
"is_local": True,
|
||||
"is_healthy": True,
|
||||
})
|
||||
|
||||
for peer in peers:
|
||||
sites.append({
|
||||
**peer.to_dict(),
|
||||
"is_local": False,
|
||||
})
|
||||
|
||||
sites.sort(key=lambda s: s.get("priority", 100))
|
||||
|
||||
return jsonify({
|
||||
"sites": sites,
|
||||
"total": len(sites),
|
||||
"healthy_count": sum(1 for s in sites if s.get("is_healthy")),
|
||||
})
|
||||
|
||||
|
||||
@admin_api_bp.route("/sites/<site_id>/bidirectional-status", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def check_bidirectional_status(site_id: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
|
||||
registry = _site_registry()
|
||||
peer = registry.get_peer(site_id)
|
||||
|
||||
if not peer:
|
||||
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
|
||||
|
||||
local_site = registry.get_local_site()
|
||||
replication = _replication()
|
||||
local_rules = replication.list_rules()
|
||||
|
||||
local_bidir_rules = []
|
||||
for rule in local_rules:
|
||||
if rule.target_connection_id == peer.connection_id and rule.mode == "bidirectional":
|
||||
local_bidir_rules.append({
|
||||
"bucket_name": rule.bucket_name,
|
||||
"target_bucket": rule.target_bucket,
|
||||
"enabled": rule.enabled,
|
||||
})
|
||||
|
||||
result = {
|
||||
"site_id": site_id,
|
||||
"local_site_id": local_site.site_id if local_site else None,
|
||||
"local_endpoint": local_site.endpoint if local_site else None,
|
||||
"local_bidirectional_rules": local_bidir_rules,
|
||||
"local_site_sync_enabled": current_app.config.get("SITE_SYNC_ENABLED", False),
|
||||
"remote_status": None,
|
||||
"issues": [],
|
||||
"is_fully_configured": False,
|
||||
}
|
||||
|
||||
if not local_site or not local_site.site_id:
|
||||
result["issues"].append({
|
||||
"code": "NO_LOCAL_SITE_ID",
|
||||
"message": "Local site identity not configured",
|
||||
"severity": "error",
|
||||
})
|
||||
|
||||
if not local_site or not local_site.endpoint:
|
||||
result["issues"].append({
|
||||
"code": "NO_LOCAL_ENDPOINT",
|
||||
"message": "Local site endpoint not configured (remote site cannot reach back)",
|
||||
"severity": "error",
|
||||
})
|
||||
|
||||
if not peer.connection_id:
|
||||
result["issues"].append({
|
||||
"code": "NO_CONNECTION",
|
||||
"message": "No connection configured for this peer",
|
||||
"severity": "error",
|
||||
})
|
||||
return jsonify(result)
|
||||
|
||||
connection = _connections().get(peer.connection_id)
|
||||
if not connection:
|
||||
result["issues"].append({
|
||||
"code": "CONNECTION_NOT_FOUND",
|
||||
"message": f"Connection '{peer.connection_id}' not found",
|
||||
"severity": "error",
|
||||
})
|
||||
return jsonify(result)
|
||||
|
||||
if not local_bidir_rules:
|
||||
result["issues"].append({
|
||||
"code": "NO_LOCAL_BIDIRECTIONAL_RULES",
|
||||
"message": "No bidirectional replication rules configured on this site",
|
||||
"severity": "warning",
|
||||
})
|
||||
|
||||
if not result["local_site_sync_enabled"]:
|
||||
result["issues"].append({
|
||||
"code": "SITE_SYNC_DISABLED",
|
||||
"message": "Site sync worker is disabled (SITE_SYNC_ENABLED=false). Pull operations will not work.",
|
||||
"severity": "warning",
|
||||
})
|
||||
|
||||
if not replication.check_endpoint_health(connection):
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_UNREACHABLE",
|
||||
"message": "Remote endpoint is not reachable",
|
||||
"severity": "error",
|
||||
})
|
||||
return jsonify(result)
|
||||
|
||||
allow_internal = current_app.config.get("ALLOW_INTERNAL_ENDPOINTS", False)
|
||||
if not _is_safe_url(peer.endpoint, allow_internal=allow_internal):
|
||||
result["issues"].append({
|
||||
"code": "ENDPOINT_NOT_ALLOWED",
|
||||
"message": "Peer endpoint points to cloud metadata service (SSRF protection)",
|
||||
"severity": "error",
|
||||
})
|
||||
return jsonify(result)
|
||||
|
||||
try:
|
||||
admin_url = peer.endpoint.rstrip("/") + "/admin/sites"
|
||||
resp = requests.get(
|
||||
admin_url,
|
||||
timeout=10,
|
||||
headers={
|
||||
"Accept": "application/json",
|
||||
"X-Access-Key": connection.access_key,
|
||||
"X-Secret-Key": connection.secret_key,
|
||||
},
|
||||
)
|
||||
|
||||
if resp.status_code == 200:
|
||||
try:
|
||||
remote_data = resp.json()
|
||||
if not isinstance(remote_data, dict):
|
||||
raise ValueError("Expected JSON object")
|
||||
remote_local = remote_data.get("local")
|
||||
if remote_local is not None and not isinstance(remote_local, dict):
|
||||
raise ValueError("Expected 'local' to be an object")
|
||||
remote_peers = remote_data.get("peers", [])
|
||||
if not isinstance(remote_peers, list):
|
||||
raise ValueError("Expected 'peers' to be a list")
|
||||
except (ValueError, json.JSONDecodeError) as e:
|
||||
logger.warning("Invalid JSON from remote admin API: %s", e)
|
||||
result["remote_status"] = {"reachable": True, "invalid_response": True}
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_INVALID_RESPONSE",
|
||||
"message": "Remote admin API returned invalid JSON",
|
||||
"severity": "warning",
|
||||
})
|
||||
return jsonify(result)
|
||||
|
||||
result["remote_status"] = {
|
||||
"reachable": True,
|
||||
"local_site": remote_local,
|
||||
"site_sync_enabled": None,
|
||||
"has_peer_for_us": False,
|
||||
"peer_connection_configured": False,
|
||||
"has_bidirectional_rules_for_us": False,
|
||||
}
|
||||
|
||||
for rp in remote_peers:
|
||||
if not isinstance(rp, dict):
|
||||
continue
|
||||
if local_site and (
|
||||
rp.get("site_id") == local_site.site_id or
|
||||
rp.get("endpoint") == local_site.endpoint
|
||||
):
|
||||
result["remote_status"]["has_peer_for_us"] = True
|
||||
result["remote_status"]["peer_connection_configured"] = bool(rp.get("connection_id"))
|
||||
break
|
||||
|
||||
if not result["remote_status"]["has_peer_for_us"]:
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_NO_PEER_FOR_US",
|
||||
"message": "Remote site does not have this site registered as a peer",
|
||||
"severity": "error",
|
||||
})
|
||||
elif not result["remote_status"]["peer_connection_configured"]:
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_NO_CONNECTION_FOR_US",
|
||||
"message": "Remote site has us as peer but no connection configured (cannot push back)",
|
||||
"severity": "error",
|
||||
})
|
||||
elif resp.status_code == 401 or resp.status_code == 403:
|
||||
result["remote_status"] = {
|
||||
"reachable": True,
|
||||
"admin_access_denied": True,
|
||||
}
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_ADMIN_ACCESS_DENIED",
|
||||
"message": "Cannot verify remote configuration (admin access denied)",
|
||||
"severity": "warning",
|
||||
})
|
||||
else:
|
||||
result["remote_status"] = {
|
||||
"reachable": True,
|
||||
"admin_api_error": resp.status_code,
|
||||
}
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_ADMIN_API_ERROR",
|
||||
"message": f"Remote admin API returned status {resp.status_code}",
|
||||
"severity": "warning",
|
||||
})
|
||||
except requests.RequestException as e:
|
||||
logger.warning("Remote admin API unreachable: %s", e)
|
||||
result["remote_status"] = {
|
||||
"reachable": False,
|
||||
"error": "Connection failed",
|
||||
}
|
||||
result["issues"].append({
|
||||
"code": "REMOTE_ADMIN_UNREACHABLE",
|
||||
"message": "Could not reach remote admin API",
|
||||
"severity": "warning",
|
||||
})
|
||||
except Exception as e:
|
||||
logger.warning("Error checking remote bidirectional status: %s", e, exc_info=True)
|
||||
result["issues"].append({
|
||||
"code": "VERIFICATION_ERROR",
|
||||
"message": "Internal error during verification",
|
||||
"severity": "warning",
|
||||
})
|
||||
|
||||
error_issues = [i for i in result["issues"] if i["severity"] == "error"]
|
||||
result["is_fully_configured"] = len(error_issues) == 0 and len(local_bidir_rules) > 0
|
||||
|
||||
return jsonify(result)
|
||||
|
||||
|
||||
def _website_domains() -> WebsiteDomainStore:
|
||||
return current_app.extensions["website_domains"]
|
||||
|
||||
|
||||
def _storage():
|
||||
return current_app.extensions["object_storage"]
|
||||
|
||||
|
||||
def _require_iam_action(action: str):
|
||||
principal, error = _require_principal()
|
||||
if error:
|
||||
return None, error
|
||||
try:
|
||||
_iam().authorize(principal, None, action)
|
||||
return principal, None
|
||||
except IamError:
|
||||
return None, _json_error("AccessDenied", f"Requires {action} permission", 403)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_list_users():
|
||||
principal, error = _require_iam_action("iam:list_users")
|
||||
if error:
|
||||
return error
|
||||
return jsonify({"users": _iam().list_users()})
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_get_user(identifier):
|
||||
principal, error = _require_iam_action("iam:get_user")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
user_id = _iam().resolve_user_id(identifier)
|
||||
return jsonify(_iam().get_user_by_id(user_id))
|
||||
except IamError as exc:
|
||||
return _json_error("NotFound", str(exc), 404)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/policies", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_get_user_policies(identifier):
|
||||
principal, error = _require_iam_action("iam:get_policy")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
return jsonify({"policies": _iam().get_user_policies(identifier)})
|
||||
except IamError as exc:
|
||||
return _json_error("NotFound", str(exc), 404)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/keys", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_create_access_key(identifier):
|
||||
principal, error = _require_iam_action("iam:create_key")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
result = _iam().create_access_key(identifier)
|
||||
logger.info("Access key created for %s by %s", identifier, principal.access_key)
|
||||
return jsonify(result), 201
|
||||
except IamError as exc:
|
||||
return _json_error("InvalidRequest", str(exc), 400)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/keys/<access_key>", methods=["DELETE"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_delete_access_key(identifier, access_key):
|
||||
principal, error = _require_iam_action("iam:delete_key")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
_iam().delete_access_key(access_key)
|
||||
logger.info("Access key %s deleted by %s", access_key, principal.access_key)
|
||||
return "", 204
|
||||
except IamError as exc:
|
||||
return _json_error("InvalidRequest", str(exc), 400)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/disable", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_disable_user(identifier):
|
||||
principal, error = _require_iam_action("iam:disable_user")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
_iam().disable_user(identifier)
|
||||
logger.info("User %s disabled by %s", identifier, principal.access_key)
|
||||
return jsonify({"status": "disabled"})
|
||||
except IamError as exc:
|
||||
return _json_error("InvalidRequest", str(exc), 400)
|
||||
|
||||
|
||||
@admin_api_bp.route("/iam/users/<identifier>/enable", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def iam_enable_user(identifier):
|
||||
principal, error = _require_iam_action("iam:disable_user")
|
||||
if error:
|
||||
return error
|
||||
try:
|
||||
_iam().enable_user(identifier)
|
||||
logger.info("User %s enabled by %s", identifier, principal.access_key)
|
||||
return jsonify({"status": "enabled"})
|
||||
except IamError as exc:
|
||||
return _json_error("InvalidRequest", str(exc), 400)
|
||||
|
||||
|
||||
@admin_api_bp.route("/website-domains", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def list_website_domains():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||
return jsonify(_website_domains().list_all())
|
||||
|
||||
|
||||
@admin_api_bp.route("/website-domains", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def create_website_domain():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||
payload = request.get_json(silent=True) or {}
|
||||
domain = normalize_domain(payload.get("domain") or "")
|
||||
bucket = (payload.get("bucket") or "").strip()
|
||||
if not domain:
|
||||
return _json_error("ValidationError", "domain is required", 400)
|
||||
if not is_valid_domain(domain):
|
||||
return _json_error("ValidationError", f"Invalid domain: '{domain}'", 400)
|
||||
if not bucket:
|
||||
return _json_error("ValidationError", "bucket is required", 400)
|
||||
storage = _storage()
|
||||
if not storage.bucket_exists(bucket):
|
||||
return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
|
||||
store = _website_domains()
|
||||
existing = store.get_bucket(domain)
|
||||
if existing:
|
||||
return _json_error("Conflict", f"Domain '{domain}' is already mapped to bucket '{existing}'", 409)
|
||||
store.set_mapping(domain, bucket)
|
||||
logger.info("Website domain mapping created: %s -> %s", domain, bucket)
|
||||
return jsonify({"domain": domain, "bucket": bucket}), 201
|
||||
|
||||
|
||||
@admin_api_bp.route("/website-domains/<domain>", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def get_website_domain(domain: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||
domain = normalize_domain(domain)
|
||||
bucket = _website_domains().get_bucket(domain)
|
||||
if not bucket:
|
||||
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
||||
return jsonify({"domain": domain, "bucket": bucket})
|
||||
|
||||
|
||||
@admin_api_bp.route("/website-domains/<domain>", methods=["PUT"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def update_website_domain(domain: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||
domain = normalize_domain(domain)
|
||||
payload = request.get_json(silent=True) or {}
|
||||
bucket = (payload.get("bucket") or "").strip()
|
||||
if not bucket:
|
||||
return _json_error("ValidationError", "bucket is required", 400)
|
||||
storage = _storage()
|
||||
if not storage.bucket_exists(bucket):
|
||||
return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
|
||||
store = _website_domains()
|
||||
if not store.get_bucket(domain):
|
||||
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
||||
store.set_mapping(domain, bucket)
|
||||
logger.info("Website domain mapping updated: %s -> %s", domain, bucket)
|
||||
return jsonify({"domain": domain, "bucket": bucket})
|
||||
|
||||
|
||||
@admin_api_bp.route("/website-domains/<domain>", methods=["DELETE"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def delete_website_domain(domain: str):
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
|
||||
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
|
||||
domain = normalize_domain(domain)
|
||||
if not _website_domains().delete_mapping(domain):
|
||||
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
|
||||
logger.info("Website domain mapping deleted: %s", domain)
|
||||
return Response(status=204)
|
||||
|
||||
|
||||
def _gc() -> Optional[GarbageCollector]:
|
||||
return current_app.extensions.get("gc")
|
||||
|
||||
|
||||
@admin_api_bp.route("/gc/status", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def gc_status():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
gc = _gc()
|
||||
if not gc:
|
||||
return jsonify({"enabled": False, "message": "GC is not enabled. Set GC_ENABLED=true to enable."})
|
||||
return jsonify(gc.get_status())
|
||||
|
||||
|
||||
@admin_api_bp.route("/gc/run", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def gc_run_now():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
gc = _gc()
|
||||
if not gc:
|
||||
return _json_error("InvalidRequest", "GC is not enabled", 400)
|
||||
payload = request.get_json(silent=True) or {}
|
||||
started = gc.run_async(dry_run=payload.get("dry_run"))
|
||||
logger.info("GC manual run by %s", principal.access_key)
|
||||
if not started:
|
||||
return _json_error("Conflict", "GC is already in progress", 409)
|
||||
return jsonify({"status": "started"})
|
||||
|
||||
|
||||
@admin_api_bp.route("/gc/history", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def gc_history():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
gc = _gc()
|
||||
if not gc:
|
||||
return jsonify({"executions": []})
|
||||
limit = min(int(request.args.get("limit", 50)), 200)
|
||||
offset = int(request.args.get("offset", 0))
|
||||
records = gc.get_history(limit=limit, offset=offset)
|
||||
return jsonify({"executions": records})
|
||||
|
||||
|
||||
def _integrity() -> Optional[IntegrityChecker]:
|
||||
return current_app.extensions.get("integrity")
|
||||
|
||||
|
||||
@admin_api_bp.route("/integrity/status", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def integrity_status():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
checker = _integrity()
|
||||
if not checker:
|
||||
return jsonify({"enabled": False, "message": "Integrity checker is not enabled. Set INTEGRITY_ENABLED=true to enable."})
|
||||
return jsonify(checker.get_status())
|
||||
|
||||
|
||||
@admin_api_bp.route("/integrity/run", methods=["POST"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def integrity_run_now():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
checker = _integrity()
|
||||
if not checker:
|
||||
return _json_error("InvalidRequest", "Integrity checker is not enabled", 400)
|
||||
payload = request.get_json(silent=True) or {}
|
||||
override_dry_run = payload.get("dry_run")
|
||||
override_auto_heal = payload.get("auto_heal")
|
||||
started = checker.run_async(
|
||||
auto_heal=override_auto_heal if override_auto_heal is not None else None,
|
||||
dry_run=override_dry_run if override_dry_run is not None else None,
|
||||
)
|
||||
logger.info("Integrity manual run by %s", principal.access_key)
|
||||
if not started:
|
||||
return _json_error("Conflict", "A scan is already in progress", 409)
|
||||
return jsonify({"status": "started"})
|
||||
|
||||
|
||||
@admin_api_bp.route("/integrity/history", methods=["GET"])
|
||||
@limiter.limit(lambda: _get_admin_rate_limit())
|
||||
def integrity_history():
|
||||
principal, error = _require_admin()
|
||||
if error:
|
||||
return error
|
||||
checker = _integrity()
|
||||
if not checker:
|
||||
return jsonify({"executions": []})
|
||||
limit = min(int(request.args.get("limit", 50)), 200)
|
||||
offset = int(request.args.get("offset", 0))
|
||||
records = checker.get_history(limit=limit, offset=offset)
|
||||
return jsonify({"executions": records})
|
||||
|
||||
|
||||
@@ -1,23 +1,89 @@
|
||||
"""Bucket policy loader/enforcer with a subset of AWS semantics."""
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from fnmatch import fnmatch
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from fnmatch import fnmatch, translate
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence
|
||||
from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple
|
||||
|
||||
|
||||
RESOURCE_PREFIX = "arn:aws:s3:::"
|
||||
|
||||
|
||||
@lru_cache(maxsize=256)
|
||||
def _compile_pattern(pattern: str) -> Pattern[str]:
|
||||
return re.compile(translate(pattern), re.IGNORECASE)
|
||||
|
||||
|
||||
def _match_string_like(value: str, pattern: str) -> bool:
|
||||
compiled = _compile_pattern(pattern)
|
||||
return bool(compiled.match(value))
|
||||
|
||||
|
||||
def _ip_in_cidr(ip_str: str, cidr: str) -> bool:
|
||||
try:
|
||||
ip = ipaddress.ip_address(ip_str)
|
||||
network = ipaddress.ip_network(cidr, strict=False)
|
||||
return ip in network
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
def _evaluate_condition_operator(
|
||||
operator: str,
|
||||
condition_key: str,
|
||||
condition_values: List[str],
|
||||
context: Dict[str, Any],
|
||||
) -> bool:
|
||||
context_value = context.get(condition_key)
|
||||
op_lower = operator.lower()
|
||||
if_exists = op_lower.endswith("ifexists")
|
||||
if if_exists:
|
||||
op_lower = op_lower[:-8]
|
||||
|
||||
if context_value is None:
|
||||
return if_exists
|
||||
|
||||
context_value_str = str(context_value)
|
||||
context_value_lower = context_value_str.lower()
|
||||
|
||||
if op_lower == "stringequals":
|
||||
return context_value_str in condition_values
|
||||
elif op_lower == "stringnotequals":
|
||||
return context_value_str not in condition_values
|
||||
elif op_lower == "stringequalsignorecase":
|
||||
return context_value_lower in [v.lower() for v in condition_values]
|
||||
elif op_lower == "stringnotequalsignorecase":
|
||||
return context_value_lower not in [v.lower() for v in condition_values]
|
||||
elif op_lower == "stringlike":
|
||||
return any(_match_string_like(context_value_str, p) for p in condition_values)
|
||||
elif op_lower == "stringnotlike":
|
||||
return not any(_match_string_like(context_value_str, p) for p in condition_values)
|
||||
elif op_lower == "ipaddress":
|
||||
return any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values)
|
||||
elif op_lower == "notipaddress":
|
||||
return not any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values)
|
||||
elif op_lower == "bool":
|
||||
bool_val = context_value_lower in ("true", "1", "yes")
|
||||
return str(bool_val).lower() in [v.lower() for v in condition_values]
|
||||
elif op_lower == "null":
|
||||
is_null = context_value is None or context_value == ""
|
||||
expected_null = condition_values[0].lower() in ("true", "1", "yes") if condition_values else True
|
||||
return is_null == expected_null
|
||||
|
||||
return False
|
||||
|
||||
ACTION_ALIASES = {
|
||||
# List actions
|
||||
"s3:listbucket": "list",
|
||||
"s3:listallmybuckets": "list",
|
||||
"s3:listbucketversions": "list",
|
||||
"s3:listmultipartuploads": "list",
|
||||
"s3:listparts": "list",
|
||||
# Read actions
|
||||
"s3:getobject": "read",
|
||||
"s3:getobjectversion": "read",
|
||||
"s3:getobjecttagging": "read",
|
||||
@@ -26,7 +92,6 @@ ACTION_ALIASES = {
|
||||
"s3:getbucketversioning": "read",
|
||||
"s3:headobject": "read",
|
||||
"s3:headbucket": "read",
|
||||
# Write actions
|
||||
"s3:putobject": "write",
|
||||
"s3:createbucket": "write",
|
||||
"s3:putobjecttagging": "write",
|
||||
@@ -36,26 +101,30 @@ ACTION_ALIASES = {
|
||||
"s3:completemultipartupload": "write",
|
||||
"s3:abortmultipartupload": "write",
|
||||
"s3:copyobject": "write",
|
||||
# Delete actions
|
||||
"s3:deleteobject": "delete",
|
||||
"s3:deleteobjectversion": "delete",
|
||||
"s3:deletebucket": "delete",
|
||||
"s3:deleteobjecttagging": "delete",
|
||||
# Share actions (ACL)
|
||||
"s3:putobjectacl": "share",
|
||||
"s3:putbucketacl": "share",
|
||||
"s3:getbucketacl": "share",
|
||||
# Policy actions
|
||||
"s3:putbucketpolicy": "policy",
|
||||
"s3:getbucketpolicy": "policy",
|
||||
"s3:deletebucketpolicy": "policy",
|
||||
# Replication actions
|
||||
"s3:getreplicationconfiguration": "replication",
|
||||
"s3:putreplicationconfiguration": "replication",
|
||||
"s3:deletereplicationconfiguration": "replication",
|
||||
"s3:replicateobject": "replication",
|
||||
"s3:replicatetags": "replication",
|
||||
"s3:replicatedelete": "replication",
|
||||
"s3:getlifecycleconfiguration": "lifecycle",
|
||||
"s3:putlifecycleconfiguration": "lifecycle",
|
||||
"s3:deletelifecycleconfiguration": "lifecycle",
|
||||
"s3:getbucketlifecycle": "lifecycle",
|
||||
"s3:putbucketlifecycle": "lifecycle",
|
||||
"s3:getbucketcors": "cors",
|
||||
"s3:putbucketcors": "cors",
|
||||
"s3:deletebucketcors": "cors",
|
||||
}
|
||||
|
||||
|
||||
@@ -133,7 +202,20 @@ class BucketPolicyStatement:
|
||||
effect: str
|
||||
principals: List[str] | str
|
||||
actions: List[str]
|
||||
resources: List[tuple[str | None, str | None]]
|
||||
resources: List[Tuple[str | None, str | None]]
|
||||
conditions: Dict[str, Dict[str, List[str]]] = field(default_factory=dict)
|
||||
_compiled_patterns: List[Tuple[str | None, Optional[Pattern[str]]]] | None = None
|
||||
|
||||
def _get_compiled_patterns(self) -> List[Tuple[str | None, Optional[Pattern[str]]]]:
|
||||
if self._compiled_patterns is None:
|
||||
self._compiled_patterns = []
|
||||
for resource_bucket, key_pattern in self.resources:
|
||||
if key_pattern is None:
|
||||
self._compiled_patterns.append((resource_bucket, None))
|
||||
else:
|
||||
regex_pattern = translate(key_pattern)
|
||||
self._compiled_patterns.append((resource_bucket, re.compile(regex_pattern)))
|
||||
return self._compiled_patterns
|
||||
|
||||
def matches_principal(self, access_key: Optional[str]) -> bool:
|
||||
if self.principals == "*":
|
||||
@@ -149,18 +231,29 @@ class BucketPolicyStatement:
|
||||
def matches_resource(self, bucket: Optional[str], object_key: Optional[str]) -> bool:
|
||||
bucket = (bucket or "*").lower()
|
||||
key = object_key or ""
|
||||
for resource_bucket, key_pattern in self.resources:
|
||||
for resource_bucket, compiled_pattern in self._get_compiled_patterns():
|
||||
resource_bucket = (resource_bucket or "*").lower()
|
||||
if resource_bucket not in {"*", bucket}:
|
||||
continue
|
||||
if key_pattern is None:
|
||||
if compiled_pattern is None:
|
||||
if not key:
|
||||
return True
|
||||
continue
|
||||
if fnmatch(key, key_pattern):
|
||||
if compiled_pattern.match(key):
|
||||
return True
|
||||
return False
|
||||
|
||||
def matches_condition(self, context: Optional[Dict[str, Any]]) -> bool:
|
||||
if not self.conditions:
|
||||
return True
|
||||
if context is None:
|
||||
context = {}
|
||||
for operator, key_values in self.conditions.items():
|
||||
for condition_key, condition_values in key_values.items():
|
||||
if not _evaluate_condition_operator(operator, condition_key, condition_values, context):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class BucketPolicyStore:
|
||||
"""Loads bucket policies from disk and evaluates statements."""
|
||||
@@ -174,8 +267,16 @@ class BucketPolicyStore:
|
||||
self._policies: Dict[str, List[BucketPolicyStatement]] = {}
|
||||
self._load()
|
||||
self._last_mtime = self._current_mtime()
|
||||
# Performance: Avoid stat() on every request
|
||||
self._last_stat_check = 0.0
|
||||
self._stat_check_interval = float(os.environ.get("BUCKET_POLICY_STAT_CHECK_INTERVAL_SECONDS", "2.0"))
|
||||
|
||||
def maybe_reload(self) -> None:
|
||||
# Performance: Skip stat check if we checked recently
|
||||
now = time.time()
|
||||
if now - self._last_stat_check < self._stat_check_interval:
|
||||
return
|
||||
self._last_stat_check = now
|
||||
current = self._current_mtime()
|
||||
if current is None or current == self._last_mtime:
|
||||
return
|
||||
@@ -194,6 +295,7 @@ class BucketPolicyStore:
|
||||
bucket: Optional[str],
|
||||
object_key: Optional[str],
|
||||
action: str,
|
||||
context: Optional[Dict[str, Any]] = None,
|
||||
) -> str | None:
|
||||
bucket = (bucket or "").lower()
|
||||
statements = self._policies.get(bucket) or []
|
||||
@@ -205,6 +307,8 @@ class BucketPolicyStore:
|
||||
continue
|
||||
if not statement.matches_resource(bucket, object_key):
|
||||
continue
|
||||
if not statement.matches_condition(context):
|
||||
continue
|
||||
if statement.effect == "deny":
|
||||
return "deny"
|
||||
decision = "allow"
|
||||
@@ -269,6 +373,7 @@ class BucketPolicyStore:
|
||||
if not resources:
|
||||
continue
|
||||
effect = statement.get("Effect", "Allow").lower()
|
||||
conditions = self._normalize_conditions(statement.get("Condition", {}))
|
||||
statements.append(
|
||||
BucketPolicyStatement(
|
||||
sid=statement.get("Sid"),
|
||||
@@ -276,6 +381,24 @@ class BucketPolicyStore:
|
||||
principals=principals,
|
||||
actions=actions or ["*"],
|
||||
resources=resources,
|
||||
conditions=conditions,
|
||||
)
|
||||
)
|
||||
return statements
|
||||
return statements
|
||||
|
||||
def _normalize_conditions(self, condition_block: Dict[str, Any]) -> Dict[str, Dict[str, List[str]]]:
|
||||
if not condition_block or not isinstance(condition_block, dict):
|
||||
return {}
|
||||
normalized: Dict[str, Dict[str, List[str]]] = {}
|
||||
for operator, key_values in condition_block.items():
|
||||
if not isinstance(key_values, dict):
|
||||
continue
|
||||
normalized[operator] = {}
|
||||
for cond_key, cond_values in key_values.items():
|
||||
if isinstance(cond_values, str):
|
||||
normalized[operator][cond_key] = [cond_values]
|
||||
elif isinstance(cond_values, list):
|
||||
normalized[operator][cond_key] = [str(v) for v in cond_values]
|
||||
else:
|
||||
normalized[operator][cond_key] = [str(cond_values)]
|
||||
return normalized
|
||||
109
app/compression.py
Normal file
109
app/compression.py
Normal file
@@ -0,0 +1,109 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import gzip
|
||||
import io
|
||||
from typing import Callable, Iterable, List, Tuple
|
||||
|
||||
COMPRESSIBLE_MIMES = frozenset([
|
||||
'application/json',
|
||||
'application/javascript',
|
||||
'application/xml',
|
||||
'text/html',
|
||||
'text/css',
|
||||
'text/plain',
|
||||
'text/xml',
|
||||
'text/javascript',
|
||||
'application/x-ndjson',
|
||||
])
|
||||
|
||||
MIN_SIZE_FOR_COMPRESSION = 500
|
||||
|
||||
|
||||
class GzipMiddleware:
|
||||
def __init__(self, app: Callable, compression_level: int = 6, min_size: int = MIN_SIZE_FOR_COMPRESSION):
|
||||
self.app = app
|
||||
self.compression_level = compression_level
|
||||
self.min_size = min_size
|
||||
|
||||
def __call__(self, environ: dict, start_response: Callable) -> Iterable[bytes]:
|
||||
accept_encoding = environ.get('HTTP_ACCEPT_ENCODING', '')
|
||||
if 'gzip' not in accept_encoding.lower():
|
||||
return self.app(environ, start_response)
|
||||
|
||||
response_started = False
|
||||
status_code = None
|
||||
response_headers: List[Tuple[str, str]] = []
|
||||
content_type = None
|
||||
content_length = None
|
||||
should_compress = False
|
||||
passthrough = False
|
||||
exc_info_holder = [None]
|
||||
|
||||
def custom_start_response(status: str, headers: List[Tuple[str, str]], exc_info=None):
|
||||
nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress, passthrough
|
||||
response_started = True
|
||||
status_code = int(status.split(' ', 1)[0])
|
||||
response_headers = list(headers)
|
||||
exc_info_holder[0] = exc_info
|
||||
|
||||
for name, value in headers:
|
||||
name_lower = name.lower()
|
||||
if name_lower == 'content-type':
|
||||
content_type = value.split(';')[0].strip().lower()
|
||||
elif name_lower == 'content-length':
|
||||
try:
|
||||
content_length = int(value)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
elif name_lower == 'content-encoding':
|
||||
passthrough = True
|
||||
return start_response(status, headers, exc_info)
|
||||
elif name_lower == 'x-stream-response':
|
||||
passthrough = True
|
||||
return start_response(status, headers, exc_info)
|
||||
|
||||
if content_type and content_type in COMPRESSIBLE_MIMES:
|
||||
if content_length is None or content_length >= self.min_size:
|
||||
should_compress = True
|
||||
else:
|
||||
passthrough = True
|
||||
return start_response(status, headers, exc_info)
|
||||
|
||||
return None
|
||||
|
||||
app_iter = self.app(environ, custom_start_response)
|
||||
|
||||
if passthrough:
|
||||
return app_iter
|
||||
|
||||
response_body = b''.join(app_iter)
|
||||
|
||||
if not response_started:
|
||||
return [response_body]
|
||||
|
||||
if should_compress and len(response_body) >= self.min_size:
|
||||
buf = io.BytesIO()
|
||||
with gzip.GzipFile(fileobj=buf, mode='wb', compresslevel=self.compression_level) as gz:
|
||||
gz.write(response_body)
|
||||
compressed = buf.getvalue()
|
||||
|
||||
if len(compressed) < len(response_body):
|
||||
response_body = compressed
|
||||
new_headers = []
|
||||
for name, value in response_headers:
|
||||
if name.lower() not in ('content-length', 'content-encoding'):
|
||||
new_headers.append((name, value))
|
||||
new_headers.append(('Content-Encoding', 'gzip'))
|
||||
new_headers.append(('Content-Length', str(len(response_body))))
|
||||
new_headers.append(('Vary', 'Accept-Encoding'))
|
||||
response_headers = new_headers
|
||||
|
||||
status_str = f"{status_code} " + {
|
||||
200: "OK", 201: "Created", 204: "No Content", 206: "Partial Content",
|
||||
301: "Moved Permanently", 302: "Found", 304: "Not Modified",
|
||||
400: "Bad Request", 401: "Unauthorized", 403: "Forbidden", 404: "Not Found",
|
||||
405: "Method Not Allowed", 409: "Conflict", 500: "Internal Server Error",
|
||||
}.get(status_code, "Unknown")
|
||||
|
||||
start_response(status_str, response_headers, exc_info_holder[0])
|
||||
return [response_body]
|
||||
370
app/config.py
370
app/config.py
@@ -1,7 +1,7 @@
|
||||
"""Configuration helpers for the S3 clone application."""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import secrets
|
||||
import shutil
|
||||
import sys
|
||||
@@ -10,6 +10,30 @@ from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import psutil
|
||||
|
||||
|
||||
def _calculate_auto_threads() -> int:
|
||||
cpu_count = psutil.cpu_count(logical=True) or 4
|
||||
return max(1, min(cpu_count * 2, 64))
|
||||
|
||||
|
||||
def _calculate_auto_connection_limit() -> int:
|
||||
available_mb = psutil.virtual_memory().available / (1024 * 1024)
|
||||
calculated = int(available_mb / 5)
|
||||
return max(20, min(calculated, 1000))
|
||||
|
||||
|
||||
def _calculate_auto_backlog(connection_limit: int) -> int:
|
||||
return max(128, min(connection_limit * 2, 4096))
|
||||
|
||||
|
||||
def _validate_rate_limit(value: str) -> str:
|
||||
pattern = r"^\d+\s+per\s+(second|minute|hour|day)$"
|
||||
if not re.match(pattern, value):
|
||||
raise ValueError(f"Invalid rate limit format: {value}. Expected format: '200 per minute'")
|
||||
return value
|
||||
|
||||
if getattr(sys, "frozen", False):
|
||||
# Running in a PyInstaller bundle
|
||||
PROJECT_ROOT = Path(sys._MEIPASS)
|
||||
@@ -56,6 +80,10 @@ class AppConfig:
|
||||
log_backup_count: int
|
||||
ratelimit_default: str
|
||||
ratelimit_storage_uri: str
|
||||
ratelimit_list_buckets: str
|
||||
ratelimit_bucket_ops: str
|
||||
ratelimit_object_ops: str
|
||||
ratelimit_head_ops: str
|
||||
cors_origins: list[str]
|
||||
cors_methods: list[str]
|
||||
cors_allow_headers: list[str]
|
||||
@@ -68,12 +96,75 @@ class AppConfig:
|
||||
stream_chunk_size: int
|
||||
multipart_min_part_size: int
|
||||
bucket_stats_cache_ttl: int
|
||||
object_cache_ttl: int
|
||||
encryption_enabled: bool
|
||||
encryption_master_key_path: Path
|
||||
kms_enabled: bool
|
||||
kms_keys_path: Path
|
||||
default_encryption_algorithm: str
|
||||
display_timezone: str
|
||||
lifecycle_enabled: bool
|
||||
lifecycle_interval_seconds: int
|
||||
metrics_history_enabled: bool
|
||||
metrics_history_retention_hours: int
|
||||
metrics_history_interval_minutes: int
|
||||
operation_metrics_enabled: bool
|
||||
operation_metrics_interval_minutes: int
|
||||
operation_metrics_retention_hours: int
|
||||
server_threads: int
|
||||
server_connection_limit: int
|
||||
server_backlog: int
|
||||
server_channel_timeout: int
|
||||
server_max_buffer_size: int
|
||||
server_threads_auto: bool
|
||||
server_connection_limit_auto: bool
|
||||
server_backlog_auto: bool
|
||||
site_sync_enabled: bool
|
||||
site_sync_interval_seconds: int
|
||||
site_sync_batch_size: int
|
||||
sigv4_timestamp_tolerance_seconds: int
|
||||
presigned_url_min_expiry_seconds: int
|
||||
presigned_url_max_expiry_seconds: int
|
||||
replication_connect_timeout_seconds: int
|
||||
replication_read_timeout_seconds: int
|
||||
replication_max_retries: int
|
||||
replication_streaming_threshold_bytes: int
|
||||
replication_max_failures_per_bucket: int
|
||||
site_sync_connect_timeout_seconds: int
|
||||
site_sync_read_timeout_seconds: int
|
||||
site_sync_max_retries: int
|
||||
site_sync_clock_skew_tolerance_seconds: float
|
||||
object_key_max_length_bytes: int
|
||||
object_cache_max_size: int
|
||||
meta_read_cache_max: int
|
||||
bucket_config_cache_ttl_seconds: float
|
||||
object_tag_limit: int
|
||||
encryption_chunk_size_bytes: int
|
||||
kms_generate_data_key_min_bytes: int
|
||||
kms_generate_data_key_max_bytes: int
|
||||
lifecycle_max_history_per_bucket: int
|
||||
site_id: Optional[str]
|
||||
site_endpoint: Optional[str]
|
||||
site_region: str
|
||||
site_priority: int
|
||||
ratelimit_admin: str
|
||||
num_trusted_proxies: int
|
||||
allowed_redirect_hosts: list[str]
|
||||
allow_internal_endpoints: bool
|
||||
website_hosting_enabled: bool
|
||||
gc_enabled: bool
|
||||
gc_interval_hours: float
|
||||
gc_temp_file_max_age_hours: float
|
||||
gc_multipart_max_age_days: int
|
||||
gc_lock_file_max_age_hours: float
|
||||
gc_dry_run: bool
|
||||
gc_io_throttle_ms: int
|
||||
integrity_enabled: bool
|
||||
integrity_interval_hours: float
|
||||
integrity_batch_size: int
|
||||
integrity_auto_heal: bool
|
||||
integrity_dry_run: bool
|
||||
integrity_io_throttle_ms: int
|
||||
|
||||
@classmethod
|
||||
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
|
||||
@@ -83,7 +174,7 @@ class AppConfig:
|
||||
return overrides.get(name, os.getenv(name, default))
|
||||
|
||||
storage_root = Path(_get("STORAGE_ROOT", PROJECT_ROOT / "data")).resolve()
|
||||
max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024)) # 1 GiB default
|
||||
max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024))
|
||||
ui_page_size = int(_get("UI_PAGE_SIZE", 100))
|
||||
auth_max_attempts = int(_get("AUTH_MAX_ATTEMPTS", 5))
|
||||
auth_lockout_minutes = int(_get("AUTH_LOCKOUT_MINUTES", 15))
|
||||
@@ -91,6 +182,8 @@ class AppConfig:
|
||||
secret_ttl_seconds = int(_get("SECRET_TTL_SECONDS", 300))
|
||||
stream_chunk_size = int(_get("STREAM_CHUNK_SIZE", 64 * 1024))
|
||||
multipart_min_part_size = int(_get("MULTIPART_MIN_PART_SIZE", 5 * 1024 * 1024))
|
||||
lifecycle_enabled = _get("LIFECYCLE_ENABLED", "false").lower() in ("true", "1", "yes")
|
||||
lifecycle_interval_seconds = int(_get("LIFECYCLE_INTERVAL_SECONDS", 3600))
|
||||
default_secret = "dev-secret-key"
|
||||
secret_key = str(_get("SECRET_KEY", default_secret))
|
||||
|
||||
@@ -105,6 +198,10 @@ class AppConfig:
|
||||
try:
|
||||
secret_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
secret_file.write_text(generated)
|
||||
try:
|
||||
os.chmod(secret_file, 0o600)
|
||||
except OSError:
|
||||
pass
|
||||
secret_key = generated
|
||||
except OSError:
|
||||
secret_key = generated
|
||||
@@ -140,8 +237,12 @@ class AppConfig:
|
||||
log_path = log_dir / str(_get("LOG_FILE", "app.log"))
|
||||
log_max_bytes = int(_get("LOG_MAX_BYTES", 5 * 1024 * 1024))
|
||||
log_backup_count = int(_get("LOG_BACKUP_COUNT", 3))
|
||||
ratelimit_default = str(_get("RATE_LIMIT_DEFAULT", "200 per minute"))
|
||||
ratelimit_default = _validate_rate_limit(str(_get("RATE_LIMIT_DEFAULT", "200 per minute")))
|
||||
ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://"))
|
||||
ratelimit_list_buckets = _validate_rate_limit(str(_get("RATE_LIMIT_LIST_BUCKETS", "60 per minute")))
|
||||
ratelimit_bucket_ops = _validate_rate_limit(str(_get("RATE_LIMIT_BUCKET_OPS", "120 per minute")))
|
||||
ratelimit_object_ops = _validate_rate_limit(str(_get("RATE_LIMIT_OBJECT_OPS", "240 per minute")))
|
||||
ratelimit_head_ops = _validate_rate_limit(str(_get("RATE_LIMIT_HEAD_OPS", "100 per minute")))
|
||||
|
||||
def _csv(value: str, default: list[str]) -> list[str]:
|
||||
if not value:
|
||||
@@ -154,8 +255,9 @@ class AppConfig:
|
||||
cors_allow_headers = _csv(str(_get("CORS_ALLOW_HEADERS", "*")), ["*"])
|
||||
cors_expose_headers = _csv(str(_get("CORS_EXPOSE_HEADERS", "*")), ["*"])
|
||||
session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30))
|
||||
bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60))
|
||||
|
||||
bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60))
|
||||
object_cache_ttl = int(_get("OBJECT_CACHE_TTL", 60))
|
||||
|
||||
encryption_enabled = str(_get("ENCRYPTION_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
encryption_keys_dir = storage_root / ".myfsio.sys" / "keys"
|
||||
encryption_master_key_path = Path(_get("ENCRYPTION_MASTER_KEY_PATH", encryption_keys_dir / "master.key")).resolve()
|
||||
@@ -163,6 +265,90 @@ class AppConfig:
|
||||
kms_keys_path = Path(_get("KMS_KEYS_PATH", encryption_keys_dir / "kms_keys.json")).resolve()
|
||||
default_encryption_algorithm = str(_get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"))
|
||||
display_timezone = str(_get("DISPLAY_TIMEZONE", "UTC"))
|
||||
metrics_history_enabled = str(_get("METRICS_HISTORY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
metrics_history_retention_hours = int(_get("METRICS_HISTORY_RETENTION_HOURS", 24))
|
||||
metrics_history_interval_minutes = int(_get("METRICS_HISTORY_INTERVAL_MINUTES", 5))
|
||||
operation_metrics_enabled = str(_get("OPERATION_METRICS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
operation_metrics_interval_minutes = int(_get("OPERATION_METRICS_INTERVAL_MINUTES", 5))
|
||||
operation_metrics_retention_hours = int(_get("OPERATION_METRICS_RETENTION_HOURS", 24))
|
||||
|
||||
_raw_threads = int(_get("SERVER_THREADS", 0))
|
||||
if _raw_threads == 0:
|
||||
server_threads = _calculate_auto_threads()
|
||||
server_threads_auto = True
|
||||
else:
|
||||
server_threads = _raw_threads
|
||||
server_threads_auto = False
|
||||
|
||||
_raw_conn_limit = int(_get("SERVER_CONNECTION_LIMIT", 0))
|
||||
if _raw_conn_limit == 0:
|
||||
server_connection_limit = _calculate_auto_connection_limit()
|
||||
server_connection_limit_auto = True
|
||||
else:
|
||||
server_connection_limit = _raw_conn_limit
|
||||
server_connection_limit_auto = False
|
||||
|
||||
_raw_backlog = int(_get("SERVER_BACKLOG", 0))
|
||||
if _raw_backlog == 0:
|
||||
server_backlog = _calculate_auto_backlog(server_connection_limit)
|
||||
server_backlog_auto = True
|
||||
else:
|
||||
server_backlog = _raw_backlog
|
||||
server_backlog_auto = False
|
||||
|
||||
server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120))
|
||||
server_max_buffer_size = int(_get("SERVER_MAX_BUFFER_SIZE", 1024 * 1024 * 128))
|
||||
site_sync_enabled = str(_get("SITE_SYNC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
site_sync_interval_seconds = int(_get("SITE_SYNC_INTERVAL_SECONDS", 60))
|
||||
site_sync_batch_size = int(_get("SITE_SYNC_BATCH_SIZE", 100))
|
||||
|
||||
sigv4_timestamp_tolerance_seconds = int(_get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900))
|
||||
presigned_url_min_expiry_seconds = int(_get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1))
|
||||
presigned_url_max_expiry_seconds = int(_get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800))
|
||||
replication_connect_timeout_seconds = int(_get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5))
|
||||
replication_read_timeout_seconds = int(_get("REPLICATION_READ_TIMEOUT_SECONDS", 30))
|
||||
replication_max_retries = int(_get("REPLICATION_MAX_RETRIES", 2))
|
||||
replication_streaming_threshold_bytes = int(_get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024))
|
||||
replication_max_failures_per_bucket = int(_get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50))
|
||||
site_sync_connect_timeout_seconds = int(_get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10))
|
||||
site_sync_read_timeout_seconds = int(_get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120))
|
||||
site_sync_max_retries = int(_get("SITE_SYNC_MAX_RETRIES", 2))
|
||||
site_sync_clock_skew_tolerance_seconds = float(_get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0))
|
||||
object_key_max_length_bytes = int(_get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024))
|
||||
object_cache_max_size = int(_get("OBJECT_CACHE_MAX_SIZE", 100))
|
||||
meta_read_cache_max = int(_get("META_READ_CACHE_MAX", 2048))
|
||||
bucket_config_cache_ttl_seconds = float(_get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0))
|
||||
object_tag_limit = int(_get("OBJECT_TAG_LIMIT", 50))
|
||||
encryption_chunk_size_bytes = int(_get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024))
|
||||
kms_generate_data_key_min_bytes = int(_get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1))
|
||||
kms_generate_data_key_max_bytes = int(_get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024))
|
||||
lifecycle_max_history_per_bucket = int(_get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50))
|
||||
|
||||
site_id_raw = _get("SITE_ID", None)
|
||||
site_id = str(site_id_raw).strip() if site_id_raw else None
|
||||
site_endpoint_raw = _get("SITE_ENDPOINT", None)
|
||||
site_endpoint = str(site_endpoint_raw).strip() if site_endpoint_raw else None
|
||||
site_region = str(_get("SITE_REGION", "us-east-1"))
|
||||
site_priority = int(_get("SITE_PRIORITY", 100))
|
||||
ratelimit_admin = _validate_rate_limit(str(_get("RATE_LIMIT_ADMIN", "60 per minute")))
|
||||
num_trusted_proxies = int(_get("NUM_TRUSTED_PROXIES", 1))
|
||||
allowed_redirect_hosts_raw = _get("ALLOWED_REDIRECT_HOSTS", "")
|
||||
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
|
||||
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
gc_enabled = str(_get("GC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
gc_interval_hours = float(_get("GC_INTERVAL_HOURS", 6.0))
|
||||
gc_temp_file_max_age_hours = float(_get("GC_TEMP_FILE_MAX_AGE_HOURS", 24.0))
|
||||
gc_multipart_max_age_days = int(_get("GC_MULTIPART_MAX_AGE_DAYS", 7))
|
||||
gc_lock_file_max_age_hours = float(_get("GC_LOCK_FILE_MAX_AGE_HOURS", 1.0))
|
||||
gc_dry_run = str(_get("GC_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
gc_io_throttle_ms = int(_get("GC_IO_THROTTLE_MS", 10))
|
||||
integrity_enabled = str(_get("INTEGRITY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
integrity_interval_hours = float(_get("INTEGRITY_INTERVAL_HOURS", 24.0))
|
||||
integrity_batch_size = int(_get("INTEGRITY_BATCH_SIZE", 1000))
|
||||
integrity_auto_heal = str(_get("INTEGRITY_AUTO_HEAL", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
integrity_dry_run = str(_get("INTEGRITY_DRY_RUN", "0")).lower() in {"1", "true", "yes", "on"}
|
||||
integrity_io_throttle_ms = int(_get("INTEGRITY_IO_THROTTLE_MS", 10))
|
||||
|
||||
return cls(storage_root=storage_root,
|
||||
max_upload_size=max_upload_size,
|
||||
@@ -181,6 +367,10 @@ class AppConfig:
|
||||
log_backup_count=log_backup_count,
|
||||
ratelimit_default=ratelimit_default,
|
||||
ratelimit_storage_uri=ratelimit_storage_uri,
|
||||
ratelimit_list_buckets=ratelimit_list_buckets,
|
||||
ratelimit_bucket_ops=ratelimit_bucket_ops,
|
||||
ratelimit_object_ops=ratelimit_object_ops,
|
||||
ratelimit_head_ops=ratelimit_head_ops,
|
||||
cors_origins=cors_origins,
|
||||
cors_methods=cors_methods,
|
||||
cors_allow_headers=cors_allow_headers,
|
||||
@@ -193,12 +383,75 @@ class AppConfig:
|
||||
stream_chunk_size=stream_chunk_size,
|
||||
multipart_min_part_size=multipart_min_part_size,
|
||||
bucket_stats_cache_ttl=bucket_stats_cache_ttl,
|
||||
object_cache_ttl=object_cache_ttl,
|
||||
encryption_enabled=encryption_enabled,
|
||||
encryption_master_key_path=encryption_master_key_path,
|
||||
kms_enabled=kms_enabled,
|
||||
kms_keys_path=kms_keys_path,
|
||||
default_encryption_algorithm=default_encryption_algorithm,
|
||||
display_timezone=display_timezone)
|
||||
display_timezone=display_timezone,
|
||||
lifecycle_enabled=lifecycle_enabled,
|
||||
lifecycle_interval_seconds=lifecycle_interval_seconds,
|
||||
metrics_history_enabled=metrics_history_enabled,
|
||||
metrics_history_retention_hours=metrics_history_retention_hours,
|
||||
metrics_history_interval_minutes=metrics_history_interval_minutes,
|
||||
operation_metrics_enabled=operation_metrics_enabled,
|
||||
operation_metrics_interval_minutes=operation_metrics_interval_minutes,
|
||||
operation_metrics_retention_hours=operation_metrics_retention_hours,
|
||||
server_threads=server_threads,
|
||||
server_connection_limit=server_connection_limit,
|
||||
server_backlog=server_backlog,
|
||||
server_channel_timeout=server_channel_timeout,
|
||||
server_max_buffer_size=server_max_buffer_size,
|
||||
server_threads_auto=server_threads_auto,
|
||||
server_connection_limit_auto=server_connection_limit_auto,
|
||||
server_backlog_auto=server_backlog_auto,
|
||||
site_sync_enabled=site_sync_enabled,
|
||||
site_sync_interval_seconds=site_sync_interval_seconds,
|
||||
site_sync_batch_size=site_sync_batch_size,
|
||||
sigv4_timestamp_tolerance_seconds=sigv4_timestamp_tolerance_seconds,
|
||||
presigned_url_min_expiry_seconds=presigned_url_min_expiry_seconds,
|
||||
presigned_url_max_expiry_seconds=presigned_url_max_expiry_seconds,
|
||||
replication_connect_timeout_seconds=replication_connect_timeout_seconds,
|
||||
replication_read_timeout_seconds=replication_read_timeout_seconds,
|
||||
replication_max_retries=replication_max_retries,
|
||||
replication_streaming_threshold_bytes=replication_streaming_threshold_bytes,
|
||||
replication_max_failures_per_bucket=replication_max_failures_per_bucket,
|
||||
site_sync_connect_timeout_seconds=site_sync_connect_timeout_seconds,
|
||||
site_sync_read_timeout_seconds=site_sync_read_timeout_seconds,
|
||||
site_sync_max_retries=site_sync_max_retries,
|
||||
site_sync_clock_skew_tolerance_seconds=site_sync_clock_skew_tolerance_seconds,
|
||||
object_key_max_length_bytes=object_key_max_length_bytes,
|
||||
object_cache_max_size=object_cache_max_size,
|
||||
meta_read_cache_max=meta_read_cache_max,
|
||||
bucket_config_cache_ttl_seconds=bucket_config_cache_ttl_seconds,
|
||||
object_tag_limit=object_tag_limit,
|
||||
encryption_chunk_size_bytes=encryption_chunk_size_bytes,
|
||||
kms_generate_data_key_min_bytes=kms_generate_data_key_min_bytes,
|
||||
kms_generate_data_key_max_bytes=kms_generate_data_key_max_bytes,
|
||||
lifecycle_max_history_per_bucket=lifecycle_max_history_per_bucket,
|
||||
site_id=site_id,
|
||||
site_endpoint=site_endpoint,
|
||||
site_region=site_region,
|
||||
site_priority=site_priority,
|
||||
ratelimit_admin=ratelimit_admin,
|
||||
num_trusted_proxies=num_trusted_proxies,
|
||||
allowed_redirect_hosts=allowed_redirect_hosts,
|
||||
allow_internal_endpoints=allow_internal_endpoints,
|
||||
website_hosting_enabled=website_hosting_enabled,
|
||||
gc_enabled=gc_enabled,
|
||||
gc_interval_hours=gc_interval_hours,
|
||||
gc_temp_file_max_age_hours=gc_temp_file_max_age_hours,
|
||||
gc_multipart_max_age_days=gc_multipart_max_age_days,
|
||||
gc_lock_file_max_age_hours=gc_lock_file_max_age_hours,
|
||||
gc_dry_run=gc_dry_run,
|
||||
gc_io_throttle_ms=gc_io_throttle_ms,
|
||||
integrity_enabled=integrity_enabled,
|
||||
integrity_interval_hours=integrity_interval_hours,
|
||||
integrity_batch_size=integrity_batch_size,
|
||||
integrity_auto_heal=integrity_auto_heal,
|
||||
integrity_dry_run=integrity_dry_run,
|
||||
integrity_io_throttle_ms=integrity_io_throttle_ms)
|
||||
|
||||
def validate_and_report(self) -> list[str]:
|
||||
"""Validate configuration and return a list of warnings/issues.
|
||||
@@ -258,7 +511,37 @@ class AppConfig:
|
||||
|
||||
if "*" in self.cors_origins:
|
||||
issues.append("INFO: CORS_ORIGINS is set to '*'. Consider restricting to specific domains in production.")
|
||||
|
||||
|
||||
if not (1 <= self.server_threads <= 64):
|
||||
issues.append(f"CRITICAL: SERVER_THREADS={self.server_threads} is outside valid range (1-64). Server cannot start.")
|
||||
if not (10 <= self.server_connection_limit <= 1000):
|
||||
issues.append(f"CRITICAL: SERVER_CONNECTION_LIMIT={self.server_connection_limit} is outside valid range (10-1000). Server cannot start.")
|
||||
if not (128 <= self.server_backlog <= 4096):
|
||||
issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (128-4096). Server cannot start.")
|
||||
if not (10 <= self.server_channel_timeout <= 300):
|
||||
issues.append(f"CRITICAL: SERVER_CHANNEL_TIMEOUT={self.server_channel_timeout} is outside valid range (10-300). Server cannot start.")
|
||||
if self.server_max_buffer_size < 1024 * 1024:
|
||||
issues.append(f"WARNING: SERVER_MAX_BUFFER_SIZE={self.server_max_buffer_size} is less than 1MB. Large uploads will fail.")
|
||||
|
||||
if sys.platform != "win32":
|
||||
try:
|
||||
import resource
|
||||
soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
|
||||
threshold = int(soft_limit * 0.8)
|
||||
if self.server_connection_limit > threshold:
|
||||
issues.append(f"WARNING: SERVER_CONNECTION_LIMIT={self.server_connection_limit} exceeds 80% of system file descriptor limit (soft={soft_limit}). Consider running 'ulimit -n {self.server_connection_limit + 100}'.")
|
||||
except (ImportError, OSError):
|
||||
pass
|
||||
|
||||
try:
|
||||
import psutil
|
||||
available_mb = psutil.virtual_memory().available / (1024 * 1024)
|
||||
estimated_mb = self.server_threads * 50
|
||||
if estimated_mb > available_mb * 0.5:
|
||||
issues.append(f"WARNING: SERVER_THREADS={self.server_threads} may require ~{estimated_mb}MB memory, exceeding 50% of available RAM ({int(available_mb)}MB).")
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
return issues
|
||||
|
||||
def print_startup_summary(self) -> None:
|
||||
@@ -276,6 +559,15 @@ class AppConfig:
|
||||
print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})")
|
||||
if self.kms_enabled:
|
||||
print(f" KMS: Enabled (Keys: {self.kms_keys_path})")
|
||||
if self.website_hosting_enabled:
|
||||
print(f" WEBSITE_HOSTING: Enabled")
|
||||
def _auto(flag: bool) -> str:
|
||||
return " (auto)" if flag else ""
|
||||
print(f" SERVER_THREADS: {self.server_threads}{_auto(self.server_threads_auto)}")
|
||||
print(f" CONNECTION_LIMIT: {self.server_connection_limit}{_auto(self.server_connection_limit_auto)}")
|
||||
print(f" BACKLOG: {self.server_backlog}{_auto(self.server_backlog_auto)}")
|
||||
print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s")
|
||||
print(f" MAX_BUFFER_SIZE: {self.server_max_buffer_size // (1024 * 1024)}MB")
|
||||
print("=" * 60)
|
||||
|
||||
issues = self.validate_and_report()
|
||||
@@ -306,6 +598,7 @@ class AppConfig:
|
||||
"STREAM_CHUNK_SIZE": self.stream_chunk_size,
|
||||
"MULTIPART_MIN_PART_SIZE": self.multipart_min_part_size,
|
||||
"BUCKET_STATS_CACHE_TTL": self.bucket_stats_cache_ttl,
|
||||
"OBJECT_CACHE_TTL": self.object_cache_ttl,
|
||||
"LOG_LEVEL": self.log_level,
|
||||
"LOG_TO_FILE": self.log_to_file,
|
||||
"LOG_FILE": str(self.log_path),
|
||||
@@ -313,6 +606,10 @@ class AppConfig:
|
||||
"LOG_BACKUP_COUNT": self.log_backup_count,
|
||||
"RATELIMIT_DEFAULT": self.ratelimit_default,
|
||||
"RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri,
|
||||
"RATELIMIT_LIST_BUCKETS": self.ratelimit_list_buckets,
|
||||
"RATELIMIT_BUCKET_OPS": self.ratelimit_bucket_ops,
|
||||
"RATELIMIT_OBJECT_OPS": self.ratelimit_object_ops,
|
||||
"RATELIMIT_HEAD_OPS": self.ratelimit_head_ops,
|
||||
"CORS_ORIGINS": self.cors_origins,
|
||||
"CORS_METHODS": self.cors_methods,
|
||||
"CORS_ALLOW_HEADERS": self.cors_allow_headers,
|
||||
@@ -324,4 +621,63 @@ class AppConfig:
|
||||
"KMS_KEYS_PATH": str(self.kms_keys_path),
|
||||
"DEFAULT_ENCRYPTION_ALGORITHM": self.default_encryption_algorithm,
|
||||
"DISPLAY_TIMEZONE": self.display_timezone,
|
||||
"LIFECYCLE_ENABLED": self.lifecycle_enabled,
|
||||
"LIFECYCLE_INTERVAL_SECONDS": self.lifecycle_interval_seconds,
|
||||
"METRICS_HISTORY_ENABLED": self.metrics_history_enabled,
|
||||
"METRICS_HISTORY_RETENTION_HOURS": self.metrics_history_retention_hours,
|
||||
"METRICS_HISTORY_INTERVAL_MINUTES": self.metrics_history_interval_minutes,
|
||||
"OPERATION_METRICS_ENABLED": self.operation_metrics_enabled,
|
||||
"OPERATION_METRICS_INTERVAL_MINUTES": self.operation_metrics_interval_minutes,
|
||||
"OPERATION_METRICS_RETENTION_HOURS": self.operation_metrics_retention_hours,
|
||||
"SERVER_THREADS": self.server_threads,
|
||||
"SERVER_CONNECTION_LIMIT": self.server_connection_limit,
|
||||
"SERVER_BACKLOG": self.server_backlog,
|
||||
"SERVER_CHANNEL_TIMEOUT": self.server_channel_timeout,
|
||||
"SERVER_MAX_BUFFER_SIZE": self.server_max_buffer_size,
|
||||
"SITE_SYNC_ENABLED": self.site_sync_enabled,
|
||||
"SITE_SYNC_INTERVAL_SECONDS": self.site_sync_interval_seconds,
|
||||
"SITE_SYNC_BATCH_SIZE": self.site_sync_batch_size,
|
||||
"SIGV4_TIMESTAMP_TOLERANCE_SECONDS": self.sigv4_timestamp_tolerance_seconds,
|
||||
"PRESIGNED_URL_MIN_EXPIRY_SECONDS": self.presigned_url_min_expiry_seconds,
|
||||
"PRESIGNED_URL_MAX_EXPIRY_SECONDS": self.presigned_url_max_expiry_seconds,
|
||||
"REPLICATION_CONNECT_TIMEOUT_SECONDS": self.replication_connect_timeout_seconds,
|
||||
"REPLICATION_READ_TIMEOUT_SECONDS": self.replication_read_timeout_seconds,
|
||||
"REPLICATION_MAX_RETRIES": self.replication_max_retries,
|
||||
"REPLICATION_STREAMING_THRESHOLD_BYTES": self.replication_streaming_threshold_bytes,
|
||||
"REPLICATION_MAX_FAILURES_PER_BUCKET": self.replication_max_failures_per_bucket,
|
||||
"SITE_SYNC_CONNECT_TIMEOUT_SECONDS": self.site_sync_connect_timeout_seconds,
|
||||
"SITE_SYNC_READ_TIMEOUT_SECONDS": self.site_sync_read_timeout_seconds,
|
||||
"SITE_SYNC_MAX_RETRIES": self.site_sync_max_retries,
|
||||
"SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS": self.site_sync_clock_skew_tolerance_seconds,
|
||||
"OBJECT_KEY_MAX_LENGTH_BYTES": self.object_key_max_length_bytes,
|
||||
"OBJECT_CACHE_MAX_SIZE": self.object_cache_max_size,
|
||||
"META_READ_CACHE_MAX": self.meta_read_cache_max,
|
||||
"BUCKET_CONFIG_CACHE_TTL_SECONDS": self.bucket_config_cache_ttl_seconds,
|
||||
"OBJECT_TAG_LIMIT": self.object_tag_limit,
|
||||
"ENCRYPTION_CHUNK_SIZE_BYTES": self.encryption_chunk_size_bytes,
|
||||
"KMS_GENERATE_DATA_KEY_MIN_BYTES": self.kms_generate_data_key_min_bytes,
|
||||
"KMS_GENERATE_DATA_KEY_MAX_BYTES": self.kms_generate_data_key_max_bytes,
|
||||
"LIFECYCLE_MAX_HISTORY_PER_BUCKET": self.lifecycle_max_history_per_bucket,
|
||||
"SITE_ID": self.site_id,
|
||||
"SITE_ENDPOINT": self.site_endpoint,
|
||||
"SITE_REGION": self.site_region,
|
||||
"SITE_PRIORITY": self.site_priority,
|
||||
"RATE_LIMIT_ADMIN": self.ratelimit_admin,
|
||||
"NUM_TRUSTED_PROXIES": self.num_trusted_proxies,
|
||||
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
|
||||
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
|
||||
"WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled,
|
||||
"GC_ENABLED": self.gc_enabled,
|
||||
"GC_INTERVAL_HOURS": self.gc_interval_hours,
|
||||
"GC_TEMP_FILE_MAX_AGE_HOURS": self.gc_temp_file_max_age_hours,
|
||||
"GC_MULTIPART_MAX_AGE_DAYS": self.gc_multipart_max_age_days,
|
||||
"GC_LOCK_FILE_MAX_AGE_HOURS": self.gc_lock_file_max_age_hours,
|
||||
"GC_DRY_RUN": self.gc_dry_run,
|
||||
"GC_IO_THROTTLE_MS": self.gc_io_throttle_ms,
|
||||
"INTEGRITY_ENABLED": self.integrity_enabled,
|
||||
"INTEGRITY_INTERVAL_HOURS": self.integrity_interval_hours,
|
||||
"INTEGRITY_BATCH_SIZE": self.integrity_batch_size,
|
||||
"INTEGRITY_AUTO_HEAL": self.integrity_auto_heal,
|
||||
"INTEGRITY_DRY_RUN": self.integrity_dry_run,
|
||||
"INTEGRITY_IO_THROTTLE_MS": self.integrity_io_throttle_ms,
|
||||
}
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
"""Manage remote S3 connections."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
"""Encrypted storage layer that wraps ObjectStorage with encryption support."""
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
@@ -79,7 +78,7 @@ class EncryptedObjectStorage:
|
||||
kms_key_id: Optional[str] = None,
|
||||
) -> ObjectMeta:
|
||||
"""Store an object, optionally with encryption.
|
||||
|
||||
|
||||
Args:
|
||||
bucket_name: Name of the bucket
|
||||
object_key: Key for the object
|
||||
@@ -87,42 +86,41 @@ class EncryptedObjectStorage:
|
||||
metadata: Optional user metadata
|
||||
server_side_encryption: Encryption algorithm ("AES256" or "aws:kms")
|
||||
kms_key_id: KMS key ID (for aws:kms encryption)
|
||||
|
||||
|
||||
Returns:
|
||||
ObjectMeta with object information
|
||||
|
||||
Performance: Uses streaming encryption for large files to reduce memory usage.
|
||||
"""
|
||||
should_encrypt, algorithm, detected_kms_key = self._should_encrypt(
|
||||
bucket_name, server_side_encryption
|
||||
)
|
||||
|
||||
|
||||
if kms_key_id is None:
|
||||
kms_key_id = detected_kms_key
|
||||
|
||||
|
||||
if should_encrypt:
|
||||
data = stream.read()
|
||||
|
||||
try:
|
||||
ciphertext, enc_metadata = self.encryption.encrypt_object(
|
||||
data,
|
||||
# Performance: Use streaming encryption to avoid loading entire file into memory
|
||||
encrypted_stream, enc_metadata = self.encryption.encrypt_stream(
|
||||
stream,
|
||||
algorithm=algorithm,
|
||||
kms_key_id=kms_key_id,
|
||||
context={"bucket": bucket_name, "key": object_key},
|
||||
)
|
||||
|
||||
|
||||
combined_metadata = metadata.copy() if metadata else {}
|
||||
combined_metadata.update(enc_metadata.to_dict())
|
||||
|
||||
encrypted_stream = io.BytesIO(ciphertext)
|
||||
|
||||
result = self.storage.put_object(
|
||||
bucket_name,
|
||||
object_key,
|
||||
encrypted_stream,
|
||||
metadata=combined_metadata,
|
||||
)
|
||||
|
||||
|
||||
result.metadata = combined_metadata
|
||||
return result
|
||||
|
||||
|
||||
except EncryptionError as exc:
|
||||
raise StorageError(f"Encryption failed: {exc}") from exc
|
||||
else:
|
||||
@@ -135,33 +133,34 @@ class EncryptedObjectStorage:
|
||||
|
||||
def get_object_data(self, bucket_name: str, object_key: str) -> tuple[bytes, Dict[str, str]]:
|
||||
"""Get object data, decrypting if necessary.
|
||||
|
||||
|
||||
Returns:
|
||||
Tuple of (data, metadata)
|
||||
|
||||
Performance: Uses streaming decryption to reduce memory usage.
|
||||
"""
|
||||
path = self.storage.get_object_path(bucket_name, object_key)
|
||||
metadata = self.storage.get_object_metadata(bucket_name, object_key)
|
||||
|
||||
with path.open("rb") as f:
|
||||
data = f.read()
|
||||
|
||||
|
||||
enc_metadata = EncryptionMetadata.from_dict(metadata)
|
||||
if enc_metadata:
|
||||
try:
|
||||
data = self.encryption.decrypt_object(
|
||||
data,
|
||||
enc_metadata,
|
||||
context={"bucket": bucket_name, "key": object_key},
|
||||
)
|
||||
# Performance: Use streaming decryption to avoid loading entire file into memory
|
||||
with path.open("rb") as f:
|
||||
decrypted_stream = self.encryption.decrypt_stream(f, enc_metadata)
|
||||
data = decrypted_stream.read()
|
||||
except EncryptionError as exc:
|
||||
raise StorageError(f"Decryption failed: {exc}") from exc
|
||||
|
||||
else:
|
||||
with path.open("rb") as f:
|
||||
data = f.read()
|
||||
|
||||
clean_metadata = {
|
||||
k: v for k, v in metadata.items()
|
||||
if not k.startswith("x-amz-encryption")
|
||||
if not k.startswith("x-amz-encryption")
|
||||
and k != "x-amz-encrypted-data-key"
|
||||
}
|
||||
|
||||
|
||||
return data, clean_metadata
|
||||
|
||||
def get_object_stream(self, bucket_name: str, object_key: str) -> tuple[BinaryIO, Dict[str, str], int]:
|
||||
@@ -190,7 +189,16 @@ class EncryptedObjectStorage:
|
||||
|
||||
def list_objects(self, bucket_name: str, **kwargs):
|
||||
return self.storage.list_objects(bucket_name, **kwargs)
|
||||
|
||||
|
||||
def list_objects_shallow(self, bucket_name: str, **kwargs):
|
||||
return self.storage.list_objects_shallow(bucket_name, **kwargs)
|
||||
|
||||
def iter_objects_shallow(self, bucket_name: str, **kwargs):
|
||||
return self.storage.iter_objects_shallow(bucket_name, **kwargs)
|
||||
|
||||
def search_objects(self, bucket_name: str, query: str, **kwargs):
|
||||
return self.storage.search_objects(bucket_name, query, **kwargs)
|
||||
|
||||
def list_objects_all(self, bucket_name: str):
|
||||
return self.storage.list_objects_all(bucket_name)
|
||||
|
||||
@@ -271,9 +279,15 @@ class EncryptedObjectStorage:
|
||||
|
||||
def get_bucket_quota(self, bucket_name: str):
|
||||
return self.storage.get_bucket_quota(bucket_name)
|
||||
|
||||
|
||||
def set_bucket_quota(self, bucket_name: str, *, max_bytes=None, max_objects=None):
|
||||
return self.storage.set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects)
|
||||
|
||||
def get_bucket_website(self, bucket_name: str):
|
||||
return self.storage.get_bucket_website(bucket_name)
|
||||
|
||||
def set_bucket_website(self, bucket_name: str, website_config):
|
||||
return self.storage.set_bucket_website(bucket_name, website_config)
|
||||
|
||||
def _compute_etag(self, path: Path) -> str:
|
||||
return self.storage._compute_etag(path)
|
||||
|
||||
@@ -1,15 +1,55 @@
|
||||
"""Encryption providers for server-side and client-side encryption."""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Any, BinaryIO, Dict, Generator, Optional
|
||||
|
||||
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
||||
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
|
||||
from cryptography.hazmat.primitives import hashes
|
||||
|
||||
if sys.platform != "win32":
|
||||
import fcntl
|
||||
|
||||
try:
|
||||
import myfsio_core as _rc
|
||||
if not all(hasattr(_rc, f) for f in (
|
||||
"encrypt_stream_chunked", "decrypt_stream_chunked",
|
||||
)):
|
||||
raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
|
||||
_HAS_RUST = True
|
||||
except ImportError:
|
||||
_rc = None
|
||||
_HAS_RUST = False
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _set_secure_file_permissions(file_path: Path) -> None:
|
||||
"""Set restrictive file permissions (owner read/write only)."""
|
||||
if sys.platform == "win32":
|
||||
try:
|
||||
username = os.environ.get("USERNAME", "")
|
||||
if username:
|
||||
subprocess.run(
|
||||
["icacls", str(file_path), "/inheritance:r",
|
||||
"/grant:r", f"{username}:F"],
|
||||
check=True, capture_output=True
|
||||
)
|
||||
else:
|
||||
logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
|
||||
except (subprocess.SubprocessError, OSError) as exc:
|
||||
logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
|
||||
else:
|
||||
os.chmod(file_path, 0o600)
|
||||
|
||||
|
||||
class EncryptionError(Exception):
|
||||
@@ -59,22 +99,34 @@ class EncryptionMetadata:
|
||||
|
||||
class EncryptionProvider:
|
||||
"""Base class for encryption providers."""
|
||||
|
||||
|
||||
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
|
||||
key_id: str, context: Dict[str, str] | None = None) -> bytes:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
def generate_data_key(self) -> tuple[bytes, bytes]:
|
||||
"""Generate a data key and its encrypted form.
|
||||
|
||||
|
||||
Returns:
|
||||
Tuple of (plaintext_key, encrypted_key)
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
|
||||
"""Decrypt an encrypted data key.
|
||||
|
||||
Args:
|
||||
encrypted_data_key: The encrypted data key bytes
|
||||
key_id: Optional key identifier (used by KMS providers)
|
||||
|
||||
Returns:
|
||||
The decrypted data key
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class LocalKeyEncryption(EncryptionProvider):
|
||||
"""SSE-S3 style encryption using a local master key.
|
||||
@@ -99,28 +151,48 @@ class LocalKeyEncryption(EncryptionProvider):
|
||||
return self._master_key
|
||||
|
||||
def _load_or_create_master_key(self) -> bytes:
|
||||
"""Load master key from file or generate a new one."""
|
||||
if self.master_key_path.exists():
|
||||
try:
|
||||
return base64.b64decode(self.master_key_path.read_text().strip())
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Failed to load master key: {exc}") from exc
|
||||
|
||||
key = secrets.token_bytes(32)
|
||||
"""Load master key from file or generate a new one (with file locking)."""
|
||||
lock_path = self.master_key_path.with_suffix(".lock")
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
self.master_key_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self.master_key_path.write_text(base64.b64encode(key).decode())
|
||||
with open(lock_path, "w") as lock_file:
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
|
||||
else:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
|
||||
try:
|
||||
if self.master_key_path.exists():
|
||||
try:
|
||||
return base64.b64decode(self.master_key_path.read_text().strip())
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Failed to load master key: {exc}") from exc
|
||||
key = secrets.token_bytes(32)
|
||||
try:
|
||||
self.master_key_path.write_text(base64.b64encode(key).decode())
|
||||
_set_secure_file_permissions(self.master_key_path)
|
||||
except OSError as exc:
|
||||
raise EncryptionError(f"Failed to save master key: {exc}") from exc
|
||||
return key
|
||||
finally:
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
|
||||
else:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
||||
except OSError as exc:
|
||||
raise EncryptionError(f"Failed to save master key: {exc}") from exc
|
||||
return key
|
||||
raise EncryptionError(f"Failed to acquire lock for master key: {exc}") from exc
|
||||
|
||||
DATA_KEY_AAD = b'{"purpose":"data_key","version":1}'
|
||||
|
||||
def _encrypt_data_key(self, data_key: bytes) -> bytes:
|
||||
"""Encrypt the data key with the master key."""
|
||||
aesgcm = AESGCM(self.master_key)
|
||||
nonce = secrets.token_bytes(12)
|
||||
encrypted = aesgcm.encrypt(nonce, data_key, None)
|
||||
encrypted = aesgcm.encrypt(nonce, data_key, self.DATA_KEY_AAD)
|
||||
return nonce + encrypted
|
||||
|
||||
|
||||
def _decrypt_data_key(self, encrypted_data_key: bytes) -> bytes:
|
||||
"""Decrypt the data key using the master key."""
|
||||
if len(encrypted_data_key) < 12 + 32 + 16: # nonce + key + tag
|
||||
@@ -129,10 +201,17 @@ class LocalKeyEncryption(EncryptionProvider):
|
||||
nonce = encrypted_data_key[:12]
|
||||
ciphertext = encrypted_data_key[12:]
|
||||
try:
|
||||
return aesgcm.decrypt(nonce, ciphertext, None)
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Failed to decrypt data key: {exc}") from exc
|
||||
|
||||
return aesgcm.decrypt(nonce, ciphertext, self.DATA_KEY_AAD)
|
||||
except Exception:
|
||||
try:
|
||||
return aesgcm.decrypt(nonce, ciphertext, None)
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Failed to decrypt data key: {exc}") from exc
|
||||
|
||||
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
|
||||
"""Decrypt an encrypted data key (key_id ignored for local encryption)."""
|
||||
return self._decrypt_data_key(encrypted_data_key)
|
||||
|
||||
def generate_data_key(self) -> tuple[bytes, bytes]:
|
||||
"""Generate a data key and its encrypted form."""
|
||||
plaintext_key = secrets.token_bytes(32)
|
||||
@@ -142,11 +221,12 @@ class LocalKeyEncryption(EncryptionProvider):
|
||||
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
|
||||
"""Encrypt data using envelope encryption."""
|
||||
data_key, encrypted_data_key = self.generate_data_key()
|
||||
|
||||
|
||||
aesgcm = AESGCM(data_key)
|
||||
nonce = secrets.token_bytes(12)
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext, None)
|
||||
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
|
||||
|
||||
return EncryptionResult(
|
||||
ciphertext=ciphertext,
|
||||
nonce=nonce,
|
||||
@@ -157,15 +237,13 @@ class LocalKeyEncryption(EncryptionProvider):
|
||||
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
|
||||
key_id: str, context: Dict[str, str] | None = None) -> bytes:
|
||||
"""Decrypt data using envelope encryption."""
|
||||
# Decrypt the data key
|
||||
data_key = self._decrypt_data_key(encrypted_data_key)
|
||||
|
||||
# Decrypt the data
|
||||
aesgcm = AESGCM(data_key)
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
try:
|
||||
return aesgcm.decrypt(nonce, ciphertext, None)
|
||||
return aesgcm.decrypt(nonce, ciphertext, aad)
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Failed to decrypt data: {exc}") from exc
|
||||
raise EncryptionError("Failed to decrypt data") from exc
|
||||
|
||||
|
||||
class StreamingEncryptor:
|
||||
@@ -183,81 +261,156 @@ class StreamingEncryptor:
|
||||
self.chunk_size = chunk_size
|
||||
|
||||
def _derive_chunk_nonce(self, base_nonce: bytes, chunk_index: int) -> bytes:
|
||||
"""Derive a unique nonce for each chunk."""
|
||||
# XOR the base nonce with the chunk index
|
||||
nonce_int = int.from_bytes(base_nonce, "big")
|
||||
derived = nonce_int ^ chunk_index
|
||||
return derived.to_bytes(12, "big")
|
||||
|
||||
def encrypt_stream(self, stream: BinaryIO,
|
||||
context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]:
|
||||
"""Encrypt a stream and return encrypted stream + metadata."""
|
||||
"""Derive a unique nonce for each chunk using HKDF."""
|
||||
hkdf = HKDF(
|
||||
algorithm=hashes.SHA256(),
|
||||
length=12,
|
||||
salt=base_nonce,
|
||||
info=chunk_index.to_bytes(4, "big"),
|
||||
)
|
||||
return hkdf.derive(b"chunk_nonce")
|
||||
|
||||
def encrypt_stream(self, stream: BinaryIO,
|
||||
context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]:
|
||||
"""Encrypt a stream and return encrypted stream + metadata.
|
||||
|
||||
Performance: Writes chunks directly to output buffer instead of accumulating in list.
|
||||
"""
|
||||
data_key, encrypted_data_key = self.provider.generate_data_key()
|
||||
base_nonce = secrets.token_bytes(12)
|
||||
|
||||
|
||||
aesgcm = AESGCM(data_key)
|
||||
encrypted_chunks = []
|
||||
# Performance: Write directly to BytesIO instead of accumulating chunks
|
||||
output = io.BytesIO()
|
||||
output.write(b"\x00\x00\x00\x00") # Placeholder for chunk count
|
||||
chunk_index = 0
|
||||
|
||||
|
||||
while True:
|
||||
chunk = stream.read(self.chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
|
||||
|
||||
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
|
||||
encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None)
|
||||
|
||||
size_prefix = len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big")
|
||||
encrypted_chunks.append(size_prefix + encrypted_chunk)
|
||||
|
||||
# Write size prefix + encrypted chunk directly
|
||||
output.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big"))
|
||||
output.write(encrypted_chunk)
|
||||
chunk_index += 1
|
||||
|
||||
header = chunk_index.to_bytes(4, "big")
|
||||
encrypted_data = header + b"".join(encrypted_chunks)
|
||||
|
||||
|
||||
# Write actual chunk count to header
|
||||
output.seek(0)
|
||||
output.write(chunk_index.to_bytes(4, "big"))
|
||||
output.seek(0)
|
||||
|
||||
metadata = EncryptionMetadata(
|
||||
algorithm="AES256",
|
||||
key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local",
|
||||
nonce=base_nonce,
|
||||
encrypted_data_key=encrypted_data_key,
|
||||
)
|
||||
|
||||
return io.BytesIO(encrypted_data), metadata
|
||||
|
||||
|
||||
return output, metadata
|
||||
|
||||
def decrypt_stream(self, stream: BinaryIO, metadata: EncryptionMetadata) -> BinaryIO:
|
||||
"""Decrypt a stream using the provided metadata."""
|
||||
if isinstance(self.provider, LocalKeyEncryption):
|
||||
data_key = self.provider._decrypt_data_key(metadata.encrypted_data_key)
|
||||
else:
|
||||
raise EncryptionError("Unsupported provider for streaming decryption")
|
||||
|
||||
"""Decrypt a stream using the provided metadata.
|
||||
|
||||
Performance: Writes chunks directly to output buffer instead of accumulating in list.
|
||||
"""
|
||||
data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id)
|
||||
|
||||
aesgcm = AESGCM(data_key)
|
||||
base_nonce = metadata.nonce
|
||||
|
||||
|
||||
chunk_count_bytes = stream.read(4)
|
||||
if len(chunk_count_bytes) < 4:
|
||||
raise EncryptionError("Invalid encrypted stream: missing header")
|
||||
chunk_count = int.from_bytes(chunk_count_bytes, "big")
|
||||
|
||||
decrypted_chunks = []
|
||||
|
||||
# Performance: Write directly to BytesIO instead of accumulating chunks
|
||||
output = io.BytesIO()
|
||||
for chunk_index in range(chunk_count):
|
||||
size_bytes = stream.read(self.HEADER_SIZE)
|
||||
if len(size_bytes) < self.HEADER_SIZE:
|
||||
raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}")
|
||||
chunk_size = int.from_bytes(size_bytes, "big")
|
||||
|
||||
|
||||
encrypted_chunk = stream.read(chunk_size)
|
||||
if len(encrypted_chunk) < chunk_size:
|
||||
raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}")
|
||||
|
||||
|
||||
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
|
||||
try:
|
||||
decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None)
|
||||
decrypted_chunks.append(decrypted_chunk)
|
||||
output.write(decrypted_chunk) # Write directly instead of appending to list
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc
|
||||
|
||||
return io.BytesIO(b"".join(decrypted_chunks))
|
||||
|
||||
output.seek(0)
|
||||
return output
|
||||
|
||||
def encrypt_file(self, input_path: str, output_path: str) -> EncryptionMetadata:
|
||||
data_key, encrypted_data_key = self.provider.generate_data_key()
|
||||
base_nonce = secrets.token_bytes(12)
|
||||
|
||||
if _HAS_RUST:
|
||||
_rc.encrypt_stream_chunked(
|
||||
input_path, output_path, data_key, base_nonce, self.chunk_size
|
||||
)
|
||||
else:
|
||||
with open(input_path, "rb") as stream:
|
||||
aesgcm = AESGCM(data_key)
|
||||
with open(output_path, "wb") as out:
|
||||
out.write(b"\x00\x00\x00\x00")
|
||||
chunk_index = 0
|
||||
while True:
|
||||
chunk = stream.read(self.chunk_size)
|
||||
if not chunk:
|
||||
break
|
||||
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
|
||||
encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None)
|
||||
out.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big"))
|
||||
out.write(encrypted_chunk)
|
||||
chunk_index += 1
|
||||
out.seek(0)
|
||||
out.write(chunk_index.to_bytes(4, "big"))
|
||||
|
||||
return EncryptionMetadata(
|
||||
algorithm="AES256",
|
||||
key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local",
|
||||
nonce=base_nonce,
|
||||
encrypted_data_key=encrypted_data_key,
|
||||
)
|
||||
|
||||
def decrypt_file(self, input_path: str, output_path: str,
|
||||
metadata: EncryptionMetadata) -> None:
|
||||
data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id)
|
||||
base_nonce = metadata.nonce
|
||||
|
||||
if _HAS_RUST:
|
||||
_rc.decrypt_stream_chunked(input_path, output_path, data_key, base_nonce)
|
||||
else:
|
||||
with open(input_path, "rb") as stream:
|
||||
chunk_count_bytes = stream.read(4)
|
||||
if len(chunk_count_bytes) < 4:
|
||||
raise EncryptionError("Invalid encrypted stream: missing header")
|
||||
chunk_count = int.from_bytes(chunk_count_bytes, "big")
|
||||
aesgcm = AESGCM(data_key)
|
||||
with open(output_path, "wb") as out:
|
||||
for chunk_index in range(chunk_count):
|
||||
size_bytes = stream.read(self.HEADER_SIZE)
|
||||
if len(size_bytes) < self.HEADER_SIZE:
|
||||
raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}")
|
||||
chunk_size = int.from_bytes(size_bytes, "big")
|
||||
encrypted_chunk = stream.read(chunk_size)
|
||||
if len(encrypted_chunk) < chunk_size:
|
||||
raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}")
|
||||
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
|
||||
try:
|
||||
decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None)
|
||||
out.write(decrypted_chunk)
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc
|
||||
|
||||
|
||||
class EncryptionManager:
|
||||
@@ -300,7 +453,8 @@ class EncryptionManager:
|
||||
|
||||
def get_streaming_encryptor(self) -> StreamingEncryptor:
|
||||
if self._streaming_encryptor is None:
|
||||
self._streaming_encryptor = StreamingEncryptor(self.get_local_provider())
|
||||
chunk_size = self.config.get("encryption_chunk_size_bytes", 64 * 1024)
|
||||
self._streaming_encryptor = StreamingEncryptor(self.get_local_provider(), chunk_size=chunk_size)
|
||||
return self._streaming_encryptor
|
||||
|
||||
def encrypt_object(self, data: bytes, algorithm: str = "AES256",
|
||||
@@ -343,13 +497,115 @@ class EncryptionManager:
|
||||
return encryptor.decrypt_stream(stream, metadata)
|
||||
|
||||
|
||||
class SSECEncryption(EncryptionProvider):
|
||||
"""SSE-C: Server-Side Encryption with Customer-Provided Keys.
|
||||
|
||||
The client provides the encryption key with each request.
|
||||
Server encrypts/decrypts but never stores the key.
|
||||
|
||||
Required headers for PUT:
|
||||
- x-amz-server-side-encryption-customer-algorithm: AES256
|
||||
- x-amz-server-side-encryption-customer-key: Base64-encoded 256-bit key
|
||||
- x-amz-server-side-encryption-customer-key-MD5: Base64-encoded MD5 of key
|
||||
"""
|
||||
|
||||
KEY_ID = "customer-provided"
|
||||
|
||||
def __init__(self, customer_key: bytes):
|
||||
if len(customer_key) != 32:
|
||||
raise EncryptionError("Customer key must be exactly 256 bits (32 bytes)")
|
||||
self.customer_key = customer_key
|
||||
|
||||
@classmethod
|
||||
def from_headers(cls, headers: Dict[str, str]) -> "SSECEncryption":
|
||||
algorithm = headers.get("x-amz-server-side-encryption-customer-algorithm", "")
|
||||
if algorithm.upper() != "AES256":
|
||||
raise EncryptionError(f"Unsupported SSE-C algorithm: {algorithm}. Only AES256 is supported.")
|
||||
|
||||
key_b64 = headers.get("x-amz-server-side-encryption-customer-key", "")
|
||||
if not key_b64:
|
||||
raise EncryptionError("Missing x-amz-server-side-encryption-customer-key header")
|
||||
|
||||
key_md5_b64 = headers.get("x-amz-server-side-encryption-customer-key-md5", "")
|
||||
|
||||
try:
|
||||
customer_key = base64.b64decode(key_b64)
|
||||
except Exception as e:
|
||||
raise EncryptionError(f"Invalid base64 in customer key: {e}") from e
|
||||
|
||||
if len(customer_key) != 32:
|
||||
raise EncryptionError(f"Customer key must be 256 bits, got {len(customer_key) * 8} bits")
|
||||
|
||||
if key_md5_b64:
|
||||
import hashlib
|
||||
expected_md5 = base64.b64encode(hashlib.md5(customer_key).digest()).decode()
|
||||
if key_md5_b64 != expected_md5:
|
||||
raise EncryptionError("Customer key MD5 mismatch")
|
||||
|
||||
return cls(customer_key)
|
||||
|
||||
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
|
||||
aesgcm = AESGCM(self.customer_key)
|
||||
nonce = secrets.token_bytes(12)
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
|
||||
|
||||
return EncryptionResult(
|
||||
ciphertext=ciphertext,
|
||||
nonce=nonce,
|
||||
key_id=self.KEY_ID,
|
||||
encrypted_data_key=b"",
|
||||
)
|
||||
|
||||
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
|
||||
key_id: str, context: Dict[str, str] | None = None) -> bytes:
|
||||
aesgcm = AESGCM(self.customer_key)
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
try:
|
||||
return aesgcm.decrypt(nonce, ciphertext, aad)
|
||||
except Exception as exc:
|
||||
raise EncryptionError("SSE-C decryption failed") from exc
|
||||
|
||||
def generate_data_key(self) -> tuple[bytes, bytes]:
|
||||
return self.customer_key, b""
|
||||
|
||||
|
||||
@dataclass
|
||||
class SSECMetadata:
|
||||
algorithm: str = "AES256"
|
||||
nonce: bytes = b""
|
||||
key_md5: str = ""
|
||||
|
||||
def to_dict(self) -> Dict[str, str]:
|
||||
return {
|
||||
"x-amz-server-side-encryption-customer-algorithm": self.algorithm,
|
||||
"x-amz-encryption-nonce": base64.b64encode(self.nonce).decode(),
|
||||
"x-amz-server-side-encryption-customer-key-MD5": self.key_md5,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, str]) -> Optional["SSECMetadata"]:
|
||||
algorithm = data.get("x-amz-server-side-encryption-customer-algorithm")
|
||||
if not algorithm:
|
||||
return None
|
||||
try:
|
||||
nonce = base64.b64decode(data.get("x-amz-encryption-nonce", ""))
|
||||
return cls(
|
||||
algorithm=algorithm,
|
||||
nonce=nonce,
|
||||
key_md5=data.get("x-amz-server-side-encryption-customer-key-MD5", ""),
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
class ClientEncryptionHelper:
|
||||
"""Helpers for client-side encryption.
|
||||
|
||||
|
||||
Client-side encryption is performed by the client, but this helper
|
||||
provides key generation and materials for clients that need them.
|
||||
"""
|
||||
|
||||
|
||||
@staticmethod
|
||||
def generate_client_key() -> Dict[str, str]:
|
||||
"""Generate a new client encryption key."""
|
||||
@@ -362,34 +618,36 @@ class ClientEncryptionHelper:
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def encrypt_with_key(plaintext: bytes, key_b64: str) -> Dict[str, str]:
|
||||
def encrypt_with_key(plaintext: bytes, key_b64: str, context: Dict[str, str] | None = None) -> Dict[str, str]:
|
||||
"""Encrypt data with a client-provided key."""
|
||||
key = base64.b64decode(key_b64)
|
||||
if len(key) != 32:
|
||||
raise EncryptionError("Key must be 256 bits (32 bytes)")
|
||||
|
||||
|
||||
aesgcm = AESGCM(key)
|
||||
nonce = secrets.token_bytes(12)
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext, None)
|
||||
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
|
||||
|
||||
return {
|
||||
"ciphertext": base64.b64encode(ciphertext).decode(),
|
||||
"nonce": base64.b64encode(nonce).decode(),
|
||||
"algorithm": "AES-256-GCM",
|
||||
}
|
||||
|
||||
|
||||
@staticmethod
|
||||
def decrypt_with_key(ciphertext_b64: str, nonce_b64: str, key_b64: str) -> bytes:
|
||||
def decrypt_with_key(ciphertext_b64: str, nonce_b64: str, key_b64: str, context: Dict[str, str] | None = None) -> bytes:
|
||||
"""Decrypt data with a client-provided key."""
|
||||
key = base64.b64decode(key_b64)
|
||||
nonce = base64.b64decode(nonce_b64)
|
||||
ciphertext = base64.b64decode(ciphertext_b64)
|
||||
|
||||
|
||||
if len(key) != 32:
|
||||
raise EncryptionError("Key must be 256 bits (32 bytes)")
|
||||
|
||||
|
||||
aesgcm = AESGCM(key)
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
try:
|
||||
return aesgcm.decrypt(nonce, ciphertext, None)
|
||||
return aesgcm.decrypt(nonce, ciphertext, aad)
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Decryption failed: {exc}") from exc
|
||||
raise EncryptionError("Decryption failed") from exc
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
"""Standardized error handling for API and UI responses."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
@@ -7,6 +6,7 @@ from typing import Optional, Dict, Any
|
||||
from xml.etree.ElementTree import Element, SubElement, tostring
|
||||
|
||||
from flask import Response, jsonify, request, flash, redirect, url_for, g
|
||||
from flask_limiter import RateLimitExceeded
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -173,10 +173,30 @@ def handle_app_error(error: AppError) -> Response:
|
||||
return error.to_xml_response()
|
||||
|
||||
|
||||
def handle_rate_limit_exceeded(e: RateLimitExceeded) -> Response:
|
||||
g.s3_error_code = "SlowDown"
|
||||
if request.path.startswith("/ui") or request.path.startswith("/buckets"):
|
||||
wants_json = (
|
||||
request.is_json or
|
||||
request.headers.get("X-Requested-With") == "XMLHttpRequest" or
|
||||
"application/json" in request.accept_mimetypes.values()
|
||||
)
|
||||
if wants_json:
|
||||
return jsonify({"success": False, "error": {"code": "SlowDown", "message": "Please reduce your request rate."}}), 429
|
||||
error = Element("Error")
|
||||
SubElement(error, "Code").text = "SlowDown"
|
||||
SubElement(error, "Message").text = "Please reduce your request rate."
|
||||
SubElement(error, "Resource").text = request.path
|
||||
SubElement(error, "RequestId").text = getattr(g, "request_id", "")
|
||||
xml_bytes = tostring(error, encoding="utf-8")
|
||||
return Response(xml_bytes, status="429 Too Many Requests", mimetype="application/xml")
|
||||
|
||||
|
||||
def register_error_handlers(app):
|
||||
"""Register error handlers with a Flask app."""
|
||||
app.register_error_handler(AppError, handle_app_error)
|
||||
|
||||
app.register_error_handler(RateLimitExceeded, handle_rate_limit_exceeded)
|
||||
|
||||
for error_class in [
|
||||
BucketNotFoundError, BucketAlreadyExistsError, BucketNotEmptyError,
|
||||
ObjectNotFoundError, InvalidObjectKeyError,
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
"""Application-wide extension instances."""
|
||||
from flask import g
|
||||
from flask_limiter import Limiter
|
||||
from flask_limiter.util import get_remote_address
|
||||
|
||||
596
app/gc.py
Normal file
596
app/gc.py
Normal file
@@ -0,0 +1,596 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GCResult:
|
||||
temp_files_deleted: int = 0
|
||||
temp_bytes_freed: int = 0
|
||||
multipart_uploads_deleted: int = 0
|
||||
multipart_bytes_freed: int = 0
|
||||
lock_files_deleted: int = 0
|
||||
orphaned_metadata_deleted: int = 0
|
||||
orphaned_versions_deleted: int = 0
|
||||
orphaned_version_bytes_freed: int = 0
|
||||
empty_dirs_removed: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
execution_time_seconds: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"temp_files_deleted": self.temp_files_deleted,
|
||||
"temp_bytes_freed": self.temp_bytes_freed,
|
||||
"multipart_uploads_deleted": self.multipart_uploads_deleted,
|
||||
"multipart_bytes_freed": self.multipart_bytes_freed,
|
||||
"lock_files_deleted": self.lock_files_deleted,
|
||||
"orphaned_metadata_deleted": self.orphaned_metadata_deleted,
|
||||
"orphaned_versions_deleted": self.orphaned_versions_deleted,
|
||||
"orphaned_version_bytes_freed": self.orphaned_version_bytes_freed,
|
||||
"empty_dirs_removed": self.empty_dirs_removed,
|
||||
"errors": self.errors,
|
||||
"execution_time_seconds": self.execution_time_seconds,
|
||||
}
|
||||
|
||||
@property
|
||||
def total_bytes_freed(self) -> int:
|
||||
return self.temp_bytes_freed + self.multipart_bytes_freed + self.orphaned_version_bytes_freed
|
||||
|
||||
@property
|
||||
def has_work(self) -> bool:
|
||||
return (
|
||||
self.temp_files_deleted > 0
|
||||
or self.multipart_uploads_deleted > 0
|
||||
or self.lock_files_deleted > 0
|
||||
or self.orphaned_metadata_deleted > 0
|
||||
or self.orphaned_versions_deleted > 0
|
||||
or self.empty_dirs_removed > 0
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class GCExecutionRecord:
|
||||
timestamp: float
|
||||
result: dict
|
||||
dry_run: bool
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"timestamp": self.timestamp,
|
||||
"result": self.result,
|
||||
"dry_run": self.dry_run,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> GCExecutionRecord:
|
||||
return cls(
|
||||
timestamp=data["timestamp"],
|
||||
result=data["result"],
|
||||
dry_run=data.get("dry_run", False),
|
||||
)
|
||||
|
||||
|
||||
class GCHistoryStore:
|
||||
def __init__(self, storage_root: Path, max_records: int = 50) -> None:
|
||||
self.storage_root = storage_root
|
||||
self.max_records = max_records
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _get_path(self) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "config" / "gc_history.json"
|
||||
|
||||
def load(self) -> List[GCExecutionRecord]:
|
||||
path = self._get_path()
|
||||
if not path.exists():
|
||||
return []
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return [GCExecutionRecord.from_dict(d) for d in data.get("executions", [])]
|
||||
except (OSError, ValueError, KeyError) as e:
|
||||
logger.error("Failed to load GC history: %s", e)
|
||||
return []
|
||||
|
||||
def save(self, records: List[GCExecutionRecord]) -> None:
|
||||
path = self._get_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
|
||||
try:
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except OSError as e:
|
||||
logger.error("Failed to save GC history: %s", e)
|
||||
|
||||
def add(self, record: GCExecutionRecord) -> None:
|
||||
with self._lock:
|
||||
records = self.load()
|
||||
records.insert(0, record)
|
||||
self.save(records)
|
||||
|
||||
def get_history(self, limit: int = 50, offset: int = 0) -> List[GCExecutionRecord]:
|
||||
return self.load()[offset : offset + limit]
|
||||
|
||||
|
||||
def _dir_size(path: Path) -> int:
|
||||
total = 0
|
||||
try:
|
||||
for f in path.rglob("*"):
|
||||
if f.is_file():
|
||||
try:
|
||||
total += f.stat().st_size
|
||||
except OSError:
|
||||
pass
|
||||
except OSError:
|
||||
pass
|
||||
return total
|
||||
|
||||
|
||||
def _file_age_hours(path: Path) -> float:
|
||||
try:
|
||||
mtime = path.stat().st_mtime
|
||||
return (time.time() - mtime) / 3600.0
|
||||
except OSError:
|
||||
return 0.0
|
||||
|
||||
|
||||
class GarbageCollector:
|
||||
SYSTEM_ROOT = ".myfsio.sys"
|
||||
SYSTEM_TMP_DIR = "tmp"
|
||||
SYSTEM_MULTIPART_DIR = "multipart"
|
||||
SYSTEM_BUCKETS_DIR = "buckets"
|
||||
BUCKET_META_DIR = "meta"
|
||||
BUCKET_VERSIONS_DIR = "versions"
|
||||
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
storage_root: Path,
|
||||
interval_hours: float = 6.0,
|
||||
temp_file_max_age_hours: float = 24.0,
|
||||
multipart_max_age_days: int = 7,
|
||||
lock_file_max_age_hours: float = 1.0,
|
||||
dry_run: bool = False,
|
||||
max_history: int = 50,
|
||||
io_throttle_ms: int = 10,
|
||||
) -> None:
|
||||
self.storage_root = Path(storage_root)
|
||||
self.interval_seconds = interval_hours * 3600.0
|
||||
self.temp_file_max_age_hours = temp_file_max_age_hours
|
||||
self.multipart_max_age_days = multipart_max_age_days
|
||||
self.lock_file_max_age_hours = lock_file_max_age_hours
|
||||
self.dry_run = dry_run
|
||||
self._timer: Optional[threading.Timer] = None
|
||||
self._shutdown = False
|
||||
self._lock = threading.Lock()
|
||||
self._scanning = False
|
||||
self._scan_start_time: Optional[float] = None
|
||||
self._io_throttle = max(0, io_throttle_ms) / 1000.0
|
||||
self.history_store = GCHistoryStore(storage_root, max_records=max_history)
|
||||
|
||||
def start(self) -> None:
|
||||
if self._timer is not None:
|
||||
return
|
||||
self._shutdown = False
|
||||
self._schedule_next()
|
||||
logger.info(
|
||||
"GC started: interval=%.1fh, temp_max_age=%.1fh, multipart_max_age=%dd, lock_max_age=%.1fh, dry_run=%s",
|
||||
self.interval_seconds / 3600.0,
|
||||
self.temp_file_max_age_hours,
|
||||
self.multipart_max_age_days,
|
||||
self.lock_file_max_age_hours,
|
||||
self.dry_run,
|
||||
)
|
||||
|
||||
def stop(self) -> None:
|
||||
self._shutdown = True
|
||||
if self._timer:
|
||||
self._timer.cancel()
|
||||
self._timer = None
|
||||
logger.info("GC stopped")
|
||||
|
||||
def _schedule_next(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
|
||||
self._timer.daemon = True
|
||||
self._timer.start()
|
||||
|
||||
def _run_cycle(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
try:
|
||||
self.run_now()
|
||||
except Exception as e:
|
||||
logger.error("GC cycle failed: %s", e)
|
||||
finally:
|
||||
self._schedule_next()
|
||||
|
||||
def run_now(self, dry_run: Optional[bool] = None) -> GCResult:
|
||||
if not self._lock.acquire(blocking=False):
|
||||
raise RuntimeError("GC is already in progress")
|
||||
|
||||
effective_dry_run = dry_run if dry_run is not None else self.dry_run
|
||||
|
||||
try:
|
||||
self._scanning = True
|
||||
self._scan_start_time = time.time()
|
||||
|
||||
start = self._scan_start_time
|
||||
result = GCResult()
|
||||
|
||||
original_dry_run = self.dry_run
|
||||
self.dry_run = effective_dry_run
|
||||
try:
|
||||
self._clean_temp_files(result)
|
||||
self._clean_orphaned_multipart(result)
|
||||
self._clean_stale_locks(result)
|
||||
self._clean_orphaned_metadata(result)
|
||||
self._clean_orphaned_versions(result)
|
||||
self._clean_empty_dirs(result)
|
||||
finally:
|
||||
self.dry_run = original_dry_run
|
||||
|
||||
result.execution_time_seconds = time.time() - start
|
||||
|
||||
if result.has_work or result.errors:
|
||||
logger.info(
|
||||
"GC completed in %.2fs: temp=%d (%.1f MB), multipart=%d (%.1f MB), "
|
||||
"locks=%d, meta=%d, versions=%d (%.1f MB), dirs=%d, errors=%d%s",
|
||||
result.execution_time_seconds,
|
||||
result.temp_files_deleted,
|
||||
result.temp_bytes_freed / (1024 * 1024),
|
||||
result.multipart_uploads_deleted,
|
||||
result.multipart_bytes_freed / (1024 * 1024),
|
||||
result.lock_files_deleted,
|
||||
result.orphaned_metadata_deleted,
|
||||
result.orphaned_versions_deleted,
|
||||
result.orphaned_version_bytes_freed / (1024 * 1024),
|
||||
result.empty_dirs_removed,
|
||||
len(result.errors),
|
||||
" (dry run)" if effective_dry_run else "",
|
||||
)
|
||||
|
||||
record = GCExecutionRecord(
|
||||
timestamp=time.time(),
|
||||
result=result.to_dict(),
|
||||
dry_run=effective_dry_run,
|
||||
)
|
||||
self.history_store.add(record)
|
||||
|
||||
return result
|
||||
finally:
|
||||
self._scanning = False
|
||||
self._scan_start_time = None
|
||||
self._lock.release()
|
||||
|
||||
def run_async(self, dry_run: Optional[bool] = None) -> bool:
|
||||
if self._scanning:
|
||||
return False
|
||||
t = threading.Thread(target=self.run_now, args=(dry_run,), daemon=True)
|
||||
t.start()
|
||||
return True
|
||||
|
||||
def _system_path(self) -> Path:
|
||||
return self.storage_root / self.SYSTEM_ROOT
|
||||
|
||||
def _throttle(self) -> bool:
|
||||
if self._shutdown:
|
||||
return True
|
||||
if self._io_throttle > 0:
|
||||
time.sleep(self._io_throttle)
|
||||
return self._shutdown
|
||||
|
||||
def _list_bucket_names(self) -> List[str]:
|
||||
names = []
|
||||
try:
|
||||
for entry in self.storage_root.iterdir():
|
||||
if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
|
||||
names.append(entry.name)
|
||||
except OSError:
|
||||
pass
|
||||
return names
|
||||
|
||||
def _clean_temp_files(self, result: GCResult) -> None:
|
||||
tmp_dir = self._system_path() / self.SYSTEM_TMP_DIR
|
||||
if not tmp_dir.exists():
|
||||
return
|
||||
try:
|
||||
for entry in tmp_dir.iterdir():
|
||||
if self._throttle():
|
||||
return
|
||||
if not entry.is_file():
|
||||
continue
|
||||
age = _file_age_hours(entry)
|
||||
if age < self.temp_file_max_age_hours:
|
||||
continue
|
||||
try:
|
||||
size = entry.stat().st_size
|
||||
if not self.dry_run:
|
||||
entry.unlink()
|
||||
result.temp_files_deleted += 1
|
||||
result.temp_bytes_freed += size
|
||||
except OSError as e:
|
||||
result.errors.append(f"temp file {entry.name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan tmp dir: {e}")
|
||||
|
||||
def _clean_orphaned_multipart(self, result: GCResult) -> None:
|
||||
cutoff_hours = self.multipart_max_age_days * 24.0
|
||||
bucket_names = self._list_bucket_names()
|
||||
|
||||
for bucket_name in bucket_names:
|
||||
if self._shutdown:
|
||||
return
|
||||
for multipart_root in (
|
||||
self._system_path() / self.SYSTEM_MULTIPART_DIR / bucket_name,
|
||||
self.storage_root / bucket_name / ".multipart",
|
||||
):
|
||||
if not multipart_root.exists():
|
||||
continue
|
||||
try:
|
||||
for upload_dir in multipart_root.iterdir():
|
||||
if self._throttle():
|
||||
return
|
||||
if not upload_dir.is_dir():
|
||||
continue
|
||||
self._maybe_clean_upload(upload_dir, cutoff_hours, result)
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan multipart {bucket_name}: {e}")
|
||||
|
||||
def _maybe_clean_upload(self, upload_dir: Path, cutoff_hours: float, result: GCResult) -> None:
|
||||
manifest_path = upload_dir / "manifest.json"
|
||||
age = _file_age_hours(manifest_path) if manifest_path.exists() else _file_age_hours(upload_dir)
|
||||
|
||||
if age < cutoff_hours:
|
||||
return
|
||||
|
||||
dir_bytes = _dir_size(upload_dir)
|
||||
try:
|
||||
if not self.dry_run:
|
||||
shutil.rmtree(upload_dir, ignore_errors=True)
|
||||
result.multipart_uploads_deleted += 1
|
||||
result.multipart_bytes_freed += dir_bytes
|
||||
except OSError as e:
|
||||
result.errors.append(f"multipart {upload_dir.name}: {e}")
|
||||
|
||||
def _clean_stale_locks(self, result: GCResult) -> None:
|
||||
buckets_root = self._system_path() / self.SYSTEM_BUCKETS_DIR
|
||||
if not buckets_root.exists():
|
||||
return
|
||||
|
||||
try:
|
||||
for bucket_dir in buckets_root.iterdir():
|
||||
if self._shutdown:
|
||||
return
|
||||
if not bucket_dir.is_dir():
|
||||
continue
|
||||
locks_dir = bucket_dir / "locks"
|
||||
if not locks_dir.exists():
|
||||
continue
|
||||
try:
|
||||
for lock_file in locks_dir.iterdir():
|
||||
if self._throttle():
|
||||
return
|
||||
if not lock_file.is_file() or not lock_file.name.endswith(".lock"):
|
||||
continue
|
||||
age = _file_age_hours(lock_file)
|
||||
if age < self.lock_file_max_age_hours:
|
||||
continue
|
||||
try:
|
||||
if not self.dry_run:
|
||||
lock_file.unlink(missing_ok=True)
|
||||
result.lock_files_deleted += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"lock {lock_file.name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan locks {bucket_dir.name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan buckets for locks: {e}")
|
||||
|
||||
def _clean_orphaned_metadata(self, result: GCResult) -> None:
|
||||
bucket_names = self._list_bucket_names()
|
||||
|
||||
for bucket_name in bucket_names:
|
||||
if self._shutdown:
|
||||
return
|
||||
legacy_meta = self.storage_root / bucket_name / ".meta"
|
||||
if legacy_meta.exists():
|
||||
self._clean_legacy_metadata(bucket_name, legacy_meta, result)
|
||||
|
||||
new_meta = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
if new_meta.exists():
|
||||
self._clean_index_metadata(bucket_name, new_meta, result)
|
||||
|
||||
def _clean_legacy_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
try:
|
||||
for meta_file in meta_root.rglob("*.meta.json"):
|
||||
if self._throttle():
|
||||
return
|
||||
if not meta_file.is_file():
|
||||
continue
|
||||
try:
|
||||
rel = meta_file.relative_to(meta_root)
|
||||
object_key = rel.as_posix().removesuffix(".meta.json")
|
||||
object_path = bucket_path / object_key
|
||||
if not object_path.exists():
|
||||
if not self.dry_run:
|
||||
meta_file.unlink(missing_ok=True)
|
||||
result.orphaned_metadata_deleted += 1
|
||||
except (OSError, ValueError) as e:
|
||||
result.errors.append(f"legacy meta {bucket_name}/{meta_file.name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan legacy meta {bucket_name}: {e}")
|
||||
|
||||
def _clean_index_metadata(self, bucket_name: str, meta_root: Path, result: GCResult) -> None:
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
try:
|
||||
for index_file in meta_root.rglob("_index.json"):
|
||||
if self._throttle():
|
||||
return
|
||||
if not index_file.is_file():
|
||||
continue
|
||||
try:
|
||||
with open(index_file, "r", encoding="utf-8") as f:
|
||||
index_data = json.load(f)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
|
||||
keys_to_remove = []
|
||||
for key in index_data:
|
||||
rel_dir = index_file.parent.relative_to(meta_root)
|
||||
if rel_dir == Path("."):
|
||||
full_key = key
|
||||
else:
|
||||
full_key = rel_dir.as_posix() + "/" + key
|
||||
object_path = bucket_path / full_key
|
||||
if not object_path.exists():
|
||||
keys_to_remove.append(key)
|
||||
|
||||
if keys_to_remove:
|
||||
if not self.dry_run:
|
||||
for k in keys_to_remove:
|
||||
index_data.pop(k, None)
|
||||
if index_data:
|
||||
try:
|
||||
with open(index_file, "w", encoding="utf-8") as f:
|
||||
json.dump(index_data, f)
|
||||
except OSError as e:
|
||||
result.errors.append(f"write index {bucket_name}: {e}")
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
index_file.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
result.orphaned_metadata_deleted += len(keys_to_remove)
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan index meta {bucket_name}: {e}")
|
||||
|
||||
def _clean_orphaned_versions(self, result: GCResult) -> None:
|
||||
bucket_names = self._list_bucket_names()
|
||||
|
||||
for bucket_name in bucket_names:
|
||||
if self._shutdown:
|
||||
return
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
for versions_root in (
|
||||
self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR,
|
||||
self.storage_root / bucket_name / ".versions",
|
||||
):
|
||||
if not versions_root.exists():
|
||||
continue
|
||||
try:
|
||||
for key_dir in versions_root.iterdir():
|
||||
if self._throttle():
|
||||
return
|
||||
if not key_dir.is_dir():
|
||||
continue
|
||||
self._clean_versions_for_key(bucket_path, versions_root, key_dir, result)
|
||||
except OSError as e:
|
||||
result.errors.append(f"scan versions {bucket_name}: {e}")
|
||||
|
||||
def _clean_versions_for_key(
|
||||
self, bucket_path: Path, versions_root: Path, key_dir: Path, result: GCResult
|
||||
) -> None:
|
||||
try:
|
||||
rel = key_dir.relative_to(versions_root)
|
||||
except ValueError:
|
||||
return
|
||||
|
||||
object_path = bucket_path / rel
|
||||
if object_path.exists():
|
||||
return
|
||||
|
||||
version_files = list(key_dir.glob("*.bin")) + list(key_dir.glob("*.json"))
|
||||
if not version_files:
|
||||
return
|
||||
|
||||
for vf in version_files:
|
||||
try:
|
||||
size = vf.stat().st_size if vf.suffix == ".bin" else 0
|
||||
if not self.dry_run:
|
||||
vf.unlink(missing_ok=True)
|
||||
if vf.suffix == ".bin":
|
||||
result.orphaned_version_bytes_freed += size
|
||||
result.orphaned_versions_deleted += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"version file {vf.name}: {e}")
|
||||
|
||||
def _clean_empty_dirs(self, result: GCResult) -> None:
|
||||
targets = [
|
||||
self._system_path() / self.SYSTEM_TMP_DIR,
|
||||
self._system_path() / self.SYSTEM_MULTIPART_DIR,
|
||||
self._system_path() / self.SYSTEM_BUCKETS_DIR,
|
||||
]
|
||||
for bucket_name in self._list_bucket_names():
|
||||
targets.append(self.storage_root / bucket_name / ".meta")
|
||||
targets.append(self.storage_root / bucket_name / ".versions")
|
||||
targets.append(self.storage_root / bucket_name / ".multipart")
|
||||
|
||||
for root in targets:
|
||||
if not root.exists():
|
||||
continue
|
||||
self._remove_empty_dirs_recursive(root, root, result)
|
||||
|
||||
def _remove_empty_dirs_recursive(self, path: Path, stop_at: Path, result: GCResult) -> bool:
|
||||
if self._shutdown:
|
||||
return False
|
||||
if not path.is_dir():
|
||||
return False
|
||||
|
||||
try:
|
||||
children = list(path.iterdir())
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
all_empty = True
|
||||
for child in children:
|
||||
if self._throttle():
|
||||
return False
|
||||
if child.is_dir():
|
||||
if not self._remove_empty_dirs_recursive(child, stop_at, result):
|
||||
all_empty = False
|
||||
else:
|
||||
all_empty = False
|
||||
|
||||
if all_empty and path != stop_at:
|
||||
try:
|
||||
if not self.dry_run:
|
||||
path.rmdir()
|
||||
result.empty_dirs_removed += 1
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
return all_empty
|
||||
|
||||
def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
|
||||
records = self.history_store.get_history(limit, offset)
|
||||
return [r.to_dict() for r in records]
|
||||
|
||||
def get_status(self) -> dict:
|
||||
status: Dict[str, Any] = {
|
||||
"enabled": not self._shutdown or self._timer is not None,
|
||||
"running": self._timer is not None and not self._shutdown,
|
||||
"scanning": self._scanning,
|
||||
"interval_hours": self.interval_seconds / 3600.0,
|
||||
"temp_file_max_age_hours": self.temp_file_max_age_hours,
|
||||
"multipart_max_age_days": self.multipart_max_age_days,
|
||||
"lock_file_max_age_hours": self.lock_file_max_age_hours,
|
||||
"dry_run": self.dry_run,
|
||||
"io_throttle_ms": round(self._io_throttle * 1000),
|
||||
}
|
||||
if self._scanning and self._scan_start_time:
|
||||
status["scan_elapsed_seconds"] = time.time() - self._scan_start_time
|
||||
return status
|
||||
837
app/iam.py
837
app/iam.py
File diff suppressed because it is too large
Load Diff
995
app/integrity.py
Normal file
995
app/integrity.py
Normal file
@@ -0,0 +1,995 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
try:
|
||||
import myfsio_core as _rc
|
||||
if not hasattr(_rc, "md5_file"):
|
||||
raise ImportError("myfsio_core is outdated, rebuild with: cd myfsio_core && maturin develop --release")
|
||||
_HAS_RUST = True
|
||||
except ImportError:
|
||||
_HAS_RUST = False
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _compute_etag(path: Path) -> str:
|
||||
if _HAS_RUST:
|
||||
return _rc.md5_file(str(path))
|
||||
checksum = hashlib.md5()
|
||||
with path.open("rb") as handle:
|
||||
for chunk in iter(lambda: handle.read(8192), b""):
|
||||
checksum.update(chunk)
|
||||
return checksum.hexdigest()
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntegrityIssue:
|
||||
issue_type: str
|
||||
bucket: str
|
||||
key: str
|
||||
detail: str
|
||||
healed: bool = False
|
||||
heal_action: str = ""
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"issue_type": self.issue_type,
|
||||
"bucket": self.bucket,
|
||||
"key": self.key,
|
||||
"detail": self.detail,
|
||||
"healed": self.healed,
|
||||
"heal_action": self.heal_action,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntegrityResult:
|
||||
corrupted_objects: int = 0
|
||||
orphaned_objects: int = 0
|
||||
phantom_metadata: int = 0
|
||||
stale_versions: int = 0
|
||||
etag_cache_inconsistencies: int = 0
|
||||
legacy_metadata_drifts: int = 0
|
||||
issues_healed: int = 0
|
||||
issues: List[IntegrityIssue] = field(default_factory=list)
|
||||
errors: List[str] = field(default_factory=list)
|
||||
objects_scanned: int = 0
|
||||
buckets_scanned: int = 0
|
||||
execution_time_seconds: float = 0.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"corrupted_objects": self.corrupted_objects,
|
||||
"orphaned_objects": self.orphaned_objects,
|
||||
"phantom_metadata": self.phantom_metadata,
|
||||
"stale_versions": self.stale_versions,
|
||||
"etag_cache_inconsistencies": self.etag_cache_inconsistencies,
|
||||
"legacy_metadata_drifts": self.legacy_metadata_drifts,
|
||||
"issues_healed": self.issues_healed,
|
||||
"issues": [i.to_dict() for i in self.issues],
|
||||
"errors": self.errors,
|
||||
"objects_scanned": self.objects_scanned,
|
||||
"buckets_scanned": self.buckets_scanned,
|
||||
"execution_time_seconds": self.execution_time_seconds,
|
||||
}
|
||||
|
||||
@property
|
||||
def total_issues(self) -> int:
|
||||
return (
|
||||
self.corrupted_objects
|
||||
+ self.orphaned_objects
|
||||
+ self.phantom_metadata
|
||||
+ self.stale_versions
|
||||
+ self.etag_cache_inconsistencies
|
||||
+ self.legacy_metadata_drifts
|
||||
)
|
||||
|
||||
@property
|
||||
def has_issues(self) -> bool:
|
||||
return self.total_issues > 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class IntegrityExecutionRecord:
|
||||
timestamp: float
|
||||
result: dict
|
||||
dry_run: bool
|
||||
auto_heal: bool
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"timestamp": self.timestamp,
|
||||
"result": self.result,
|
||||
"dry_run": self.dry_run,
|
||||
"auto_heal": self.auto_heal,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> IntegrityExecutionRecord:
|
||||
return cls(
|
||||
timestamp=data["timestamp"],
|
||||
result=data["result"],
|
||||
dry_run=data.get("dry_run", False),
|
||||
auto_heal=data.get("auto_heal", False),
|
||||
)
|
||||
|
||||
|
||||
class IntegrityHistoryStore:
|
||||
def __init__(self, storage_root: Path, max_records: int = 50) -> None:
|
||||
self.storage_root = storage_root
|
||||
self.max_records = max_records
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _get_path(self) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "config" / "integrity_history.json"
|
||||
|
||||
def load(self) -> List[IntegrityExecutionRecord]:
|
||||
path = self._get_path()
|
||||
if not path.exists():
|
||||
return []
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return [IntegrityExecutionRecord.from_dict(d) for d in data.get("executions", [])]
|
||||
except (OSError, ValueError, KeyError) as e:
|
||||
logger.error("Failed to load integrity history: %s", e)
|
||||
return []
|
||||
|
||||
def save(self, records: List[IntegrityExecutionRecord]) -> None:
|
||||
path = self._get_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {"executions": [r.to_dict() for r in records[: self.max_records]]}
|
||||
try:
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except OSError as e:
|
||||
logger.error("Failed to save integrity history: %s", e)
|
||||
|
||||
def add(self, record: IntegrityExecutionRecord) -> None:
|
||||
with self._lock:
|
||||
records = self.load()
|
||||
records.insert(0, record)
|
||||
self.save(records)
|
||||
|
||||
def get_history(self, limit: int = 50, offset: int = 0) -> List[IntegrityExecutionRecord]:
|
||||
return self.load()[offset : offset + limit]
|
||||
|
||||
|
||||
class IntegrityCursorStore:
|
||||
def __init__(self, storage_root: Path) -> None:
|
||||
self.storage_root = storage_root
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _get_path(self) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "config" / "integrity_cursor.json"
|
||||
|
||||
def load(self) -> Dict[str, Any]:
|
||||
path = self._get_path()
|
||||
if not path.exists():
|
||||
return {"buckets": {}}
|
||||
try:
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if not isinstance(data.get("buckets"), dict):
|
||||
return {"buckets": {}}
|
||||
return data
|
||||
except (OSError, ValueError, KeyError):
|
||||
return {"buckets": {}}
|
||||
|
||||
def save(self, data: Dict[str, Any]) -> None:
|
||||
path = self._get_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except OSError as e:
|
||||
logger.error("Failed to save integrity cursor: %s", e)
|
||||
|
||||
def update_bucket(
|
||||
self,
|
||||
bucket_name: str,
|
||||
timestamp: float,
|
||||
last_key: Optional[str] = None,
|
||||
completed: bool = False,
|
||||
) -> None:
|
||||
with self._lock:
|
||||
data = self.load()
|
||||
entry = data["buckets"].get(bucket_name, {})
|
||||
if completed:
|
||||
entry["last_scanned"] = timestamp
|
||||
entry.pop("last_key", None)
|
||||
entry["completed"] = True
|
||||
else:
|
||||
entry["last_scanned"] = timestamp
|
||||
if last_key is not None:
|
||||
entry["last_key"] = last_key
|
||||
entry["completed"] = False
|
||||
data["buckets"][bucket_name] = entry
|
||||
self.save(data)
|
||||
|
||||
def clean_stale(self, existing_buckets: List[str]) -> None:
|
||||
with self._lock:
|
||||
data = self.load()
|
||||
existing_set = set(existing_buckets)
|
||||
stale_keys = [k for k in data["buckets"] if k not in existing_set]
|
||||
if stale_keys:
|
||||
for k in stale_keys:
|
||||
del data["buckets"][k]
|
||||
self.save(data)
|
||||
|
||||
def get_last_key(self, bucket_name: str) -> Optional[str]:
|
||||
data = self.load()
|
||||
entry = data.get("buckets", {}).get(bucket_name)
|
||||
if entry is None:
|
||||
return None
|
||||
return entry.get("last_key")
|
||||
|
||||
def get_bucket_order(self, bucket_names: List[str]) -> List[str]:
|
||||
data = self.load()
|
||||
buckets_info = data.get("buckets", {})
|
||||
|
||||
incomplete = []
|
||||
complete = []
|
||||
for name in bucket_names:
|
||||
entry = buckets_info.get(name)
|
||||
if entry is None:
|
||||
incomplete.append((name, 0.0))
|
||||
elif entry.get("last_key") is not None:
|
||||
incomplete.append((name, entry.get("last_scanned", 0.0)))
|
||||
else:
|
||||
complete.append((name, entry.get("last_scanned", 0.0)))
|
||||
|
||||
incomplete.sort(key=lambda x: x[1])
|
||||
complete.sort(key=lambda x: x[1])
|
||||
|
||||
return [n for n, _ in incomplete] + [n for n, _ in complete]
|
||||
|
||||
def get_info(self) -> Dict[str, Any]:
|
||||
data = self.load()
|
||||
buckets = data.get("buckets", {})
|
||||
return {
|
||||
"tracked_buckets": len(buckets),
|
||||
"buckets": {
|
||||
name: {
|
||||
"last_scanned": info.get("last_scanned"),
|
||||
"last_key": info.get("last_key"),
|
||||
"completed": info.get("completed", False),
|
||||
}
|
||||
for name, info in buckets.items()
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
MAX_ISSUES = 500
|
||||
|
||||
|
||||
class IntegrityChecker:
|
||||
SYSTEM_ROOT = ".myfsio.sys"
|
||||
SYSTEM_BUCKETS_DIR = "buckets"
|
||||
BUCKET_META_DIR = "meta"
|
||||
BUCKET_VERSIONS_DIR = "versions"
|
||||
INTERNAL_FOLDERS = {".meta", ".versions", ".multipart"}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
storage_root: Path,
|
||||
interval_hours: float = 24.0,
|
||||
batch_size: int = 1000,
|
||||
auto_heal: bool = False,
|
||||
dry_run: bool = False,
|
||||
max_history: int = 50,
|
||||
io_throttle_ms: int = 10,
|
||||
) -> None:
|
||||
self.storage_root = Path(storage_root)
|
||||
self.interval_seconds = interval_hours * 3600.0
|
||||
self.batch_size = batch_size
|
||||
self.auto_heal = auto_heal
|
||||
self.dry_run = dry_run
|
||||
self._timer: Optional[threading.Timer] = None
|
||||
self._shutdown = False
|
||||
self._lock = threading.Lock()
|
||||
self._scanning = False
|
||||
self._scan_start_time: Optional[float] = None
|
||||
self._io_throttle = max(0, io_throttle_ms) / 1000.0
|
||||
self.history_store = IntegrityHistoryStore(storage_root, max_records=max_history)
|
||||
self.cursor_store = IntegrityCursorStore(self.storage_root)
|
||||
|
||||
def start(self) -> None:
|
||||
if self._timer is not None:
|
||||
return
|
||||
self._shutdown = False
|
||||
self._schedule_next()
|
||||
logger.info(
|
||||
"Integrity checker started: interval=%.1fh, batch_size=%d, auto_heal=%s, dry_run=%s",
|
||||
self.interval_seconds / 3600.0,
|
||||
self.batch_size,
|
||||
self.auto_heal,
|
||||
self.dry_run,
|
||||
)
|
||||
|
||||
def stop(self) -> None:
|
||||
self._shutdown = True
|
||||
if self._timer:
|
||||
self._timer.cancel()
|
||||
self._timer = None
|
||||
logger.info("Integrity checker stopped")
|
||||
|
||||
def _schedule_next(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
self._timer = threading.Timer(self.interval_seconds, self._run_cycle)
|
||||
self._timer.daemon = True
|
||||
self._timer.start()
|
||||
|
||||
def _run_cycle(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
try:
|
||||
self.run_now()
|
||||
except Exception as e:
|
||||
logger.error("Integrity check cycle failed: %s", e)
|
||||
finally:
|
||||
self._schedule_next()
|
||||
|
||||
def run_now(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> IntegrityResult:
|
||||
if not self._lock.acquire(blocking=False):
|
||||
raise RuntimeError("Integrity scan is already in progress")
|
||||
|
||||
try:
|
||||
self._scanning = True
|
||||
self._scan_start_time = time.time()
|
||||
|
||||
effective_auto_heal = auto_heal if auto_heal is not None else self.auto_heal
|
||||
effective_dry_run = dry_run if dry_run is not None else self.dry_run
|
||||
|
||||
start = self._scan_start_time
|
||||
result = IntegrityResult()
|
||||
|
||||
bucket_names = self._list_bucket_names()
|
||||
self.cursor_store.clean_stale(bucket_names)
|
||||
ordered_buckets = self.cursor_store.get_bucket_order(bucket_names)
|
||||
|
||||
for bucket_name in ordered_buckets:
|
||||
if self._batch_exhausted(result):
|
||||
break
|
||||
result.buckets_scanned += 1
|
||||
cursor_key = self.cursor_store.get_last_key(bucket_name)
|
||||
key_corrupted = self._check_corrupted_objects(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
|
||||
key_orphaned = self._check_orphaned_objects(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
|
||||
key_phantom = self._check_phantom_metadata(bucket_name, result, effective_auto_heal, effective_dry_run, cursor_key)
|
||||
self._check_stale_versions(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||
self._check_etag_cache(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||
self._check_legacy_metadata(bucket_name, result, effective_auto_heal, effective_dry_run)
|
||||
returned_keys = [k for k in (key_corrupted, key_orphaned, key_phantom) if k is not None]
|
||||
bucket_exhausted = self._batch_exhausted(result)
|
||||
if bucket_exhausted and returned_keys:
|
||||
self.cursor_store.update_bucket(bucket_name, time.time(), last_key=min(returned_keys))
|
||||
else:
|
||||
self.cursor_store.update_bucket(bucket_name, time.time(), completed=True)
|
||||
|
||||
result.execution_time_seconds = time.time() - start
|
||||
|
||||
if result.has_issues or result.errors:
|
||||
logger.info(
|
||||
"Integrity check completed in %.2fs: corrupted=%d, orphaned=%d, phantom=%d, "
|
||||
"stale_versions=%d, etag_cache=%d, legacy_drift=%d, healed=%d, errors=%d%s",
|
||||
result.execution_time_seconds,
|
||||
result.corrupted_objects,
|
||||
result.orphaned_objects,
|
||||
result.phantom_metadata,
|
||||
result.stale_versions,
|
||||
result.etag_cache_inconsistencies,
|
||||
result.legacy_metadata_drifts,
|
||||
result.issues_healed,
|
||||
len(result.errors),
|
||||
" (dry run)" if effective_dry_run else "",
|
||||
)
|
||||
|
||||
record = IntegrityExecutionRecord(
|
||||
timestamp=time.time(),
|
||||
result=result.to_dict(),
|
||||
dry_run=effective_dry_run,
|
||||
auto_heal=effective_auto_heal,
|
||||
)
|
||||
self.history_store.add(record)
|
||||
|
||||
return result
|
||||
finally:
|
||||
self._scanning = False
|
||||
self._scan_start_time = None
|
||||
self._lock.release()
|
||||
|
||||
def run_async(self, auto_heal: Optional[bool] = None, dry_run: Optional[bool] = None) -> bool:
|
||||
if self._scanning:
|
||||
return False
|
||||
t = threading.Thread(target=self.run_now, args=(auto_heal, dry_run), daemon=True)
|
||||
t.start()
|
||||
return True
|
||||
|
||||
def _system_path(self) -> Path:
|
||||
return self.storage_root / self.SYSTEM_ROOT
|
||||
|
||||
def _list_bucket_names(self) -> List[str]:
|
||||
names = []
|
||||
try:
|
||||
for entry in self.storage_root.iterdir():
|
||||
if entry.is_dir() and entry.name != self.SYSTEM_ROOT:
|
||||
names.append(entry.name)
|
||||
except OSError:
|
||||
pass
|
||||
return names
|
||||
|
||||
def _throttle(self) -> bool:
|
||||
if self._shutdown:
|
||||
return True
|
||||
if self._io_throttle > 0:
|
||||
time.sleep(self._io_throttle)
|
||||
return self._shutdown
|
||||
|
||||
def _batch_exhausted(self, result: IntegrityResult) -> bool:
|
||||
return self._shutdown or result.objects_scanned >= self.batch_size
|
||||
|
||||
def _add_issue(self, result: IntegrityResult, issue: IntegrityIssue) -> None:
|
||||
if len(result.issues) < MAX_ISSUES:
|
||||
result.issues.append(issue)
|
||||
|
||||
def _collect_index_keys(
|
||||
self, meta_root: Path, cursor_key: Optional[str] = None,
|
||||
) -> Dict[str, Dict[str, Any]]:
|
||||
all_keys: Dict[str, Dict[str, Any]] = {}
|
||||
if not meta_root.exists():
|
||||
return all_keys
|
||||
try:
|
||||
for index_file in meta_root.rglob("_index.json"):
|
||||
if not index_file.is_file():
|
||||
continue
|
||||
rel_dir = index_file.parent.relative_to(meta_root)
|
||||
dir_prefix = "" if rel_dir == Path(".") else rel_dir.as_posix()
|
||||
if cursor_key is not None and dir_prefix:
|
||||
full_prefix = dir_prefix + "/"
|
||||
if not cursor_key.startswith(full_prefix) and cursor_key > full_prefix:
|
||||
continue
|
||||
try:
|
||||
index_data = json.loads(index_file.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
for key_name, entry in index_data.items():
|
||||
full_key = (dir_prefix + "/" + key_name) if dir_prefix else key_name
|
||||
if cursor_key is not None and full_key <= cursor_key:
|
||||
continue
|
||||
all_keys[full_key] = {
|
||||
"entry": entry,
|
||||
"index_file": index_file,
|
||||
"key_name": key_name,
|
||||
}
|
||||
except OSError:
|
||||
pass
|
||||
return all_keys
|
||||
|
||||
def _walk_bucket_files_sorted(
|
||||
self, bucket_path: Path, cursor_key: Optional[str] = None,
|
||||
):
|
||||
def _walk(dir_path: Path, prefix: str):
|
||||
try:
|
||||
entries = list(os.scandir(dir_path))
|
||||
except OSError:
|
||||
return
|
||||
|
||||
def _sort_key(e):
|
||||
if e.is_dir(follow_symlinks=False):
|
||||
return e.name + "/"
|
||||
return e.name
|
||||
|
||||
entries.sort(key=_sort_key)
|
||||
|
||||
for entry in entries:
|
||||
if entry.is_dir(follow_symlinks=False):
|
||||
if not prefix and entry.name in self.INTERNAL_FOLDERS:
|
||||
continue
|
||||
new_prefix = (prefix + "/" + entry.name) if prefix else entry.name
|
||||
if cursor_key is not None:
|
||||
full_prefix = new_prefix + "/"
|
||||
if not cursor_key.startswith(full_prefix) and cursor_key > full_prefix:
|
||||
continue
|
||||
yield from _walk(Path(entry.path), new_prefix)
|
||||
elif entry.is_file(follow_symlinks=False):
|
||||
full_key = (prefix + "/" + entry.name) if prefix else entry.name
|
||||
if cursor_key is not None and full_key <= cursor_key:
|
||||
continue
|
||||
yield full_key
|
||||
|
||||
yield from _walk(bucket_path, "")
|
||||
|
||||
def _check_corrupted_objects(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
|
||||
cursor_key: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
if self._batch_exhausted(result):
|
||||
return None
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
|
||||
if not meta_root.exists():
|
||||
return None
|
||||
|
||||
last_key = None
|
||||
try:
|
||||
all_keys = self._collect_index_keys(meta_root, cursor_key)
|
||||
sorted_keys = sorted(all_keys.keys())
|
||||
|
||||
for full_key in sorted_keys:
|
||||
if self._throttle():
|
||||
return last_key
|
||||
if self._batch_exhausted(result):
|
||||
return last_key
|
||||
|
||||
info = all_keys[full_key]
|
||||
entry = info["entry"]
|
||||
index_file = info["index_file"]
|
||||
key_name = info["key_name"]
|
||||
|
||||
object_path = bucket_path / full_key
|
||||
if not object_path.exists():
|
||||
continue
|
||||
|
||||
result.objects_scanned += 1
|
||||
last_key = full_key
|
||||
|
||||
meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
|
||||
stored_etag = meta.get("__etag__")
|
||||
if not stored_etag:
|
||||
continue
|
||||
|
||||
try:
|
||||
actual_etag = _compute_etag(object_path)
|
||||
except OSError:
|
||||
continue
|
||||
|
||||
if actual_etag != stored_etag:
|
||||
result.corrupted_objects += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="corrupted_object",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail=f"stored_etag={stored_etag} actual_etag={actual_etag}",
|
||||
)
|
||||
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
stat = object_path.stat()
|
||||
meta["__etag__"] = actual_etag
|
||||
meta["__size__"] = str(stat.st_size)
|
||||
meta["__last_modified__"] = str(stat.st_mtime)
|
||||
try:
|
||||
index_data = json.loads(index_file.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
index_data = {}
|
||||
index_data[key_name] = {"metadata": meta}
|
||||
self._atomic_write_index(index_file, index_data)
|
||||
issue.healed = True
|
||||
issue.heal_action = "updated etag in index"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal corrupted {bucket_name}/{full_key}: {e}")
|
||||
|
||||
self._add_issue(result, issue)
|
||||
except OSError as e:
|
||||
result.errors.append(f"check corrupted {bucket_name}: {e}")
|
||||
return last_key
|
||||
|
||||
def _check_orphaned_objects(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
|
||||
cursor_key: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
if self._batch_exhausted(result):
|
||||
return None
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
|
||||
last_key = None
|
||||
try:
|
||||
for full_key in self._walk_bucket_files_sorted(bucket_path, cursor_key):
|
||||
if self._throttle():
|
||||
return last_key
|
||||
if self._batch_exhausted(result):
|
||||
return last_key
|
||||
|
||||
result.objects_scanned += 1
|
||||
last_key = full_key
|
||||
key_path = Path(full_key)
|
||||
key_name = key_path.name
|
||||
parent = key_path.parent
|
||||
|
||||
if parent == Path("."):
|
||||
index_path = meta_root / "_index.json"
|
||||
else:
|
||||
index_path = meta_root / parent / "_index.json"
|
||||
|
||||
has_entry = False
|
||||
if index_path.exists():
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
has_entry = key_name in index_data
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
if not has_entry:
|
||||
result.orphaned_objects += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="orphaned_object",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail="file exists without metadata entry",
|
||||
)
|
||||
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
object_path = bucket_path / full_key
|
||||
etag = _compute_etag(object_path)
|
||||
stat = object_path.stat()
|
||||
meta = {
|
||||
"__etag__": etag,
|
||||
"__size__": str(stat.st_size),
|
||||
"__last_modified__": str(stat.st_mtime),
|
||||
}
|
||||
index_data = {}
|
||||
if index_path.exists():
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
index_data[key_name] = {"metadata": meta}
|
||||
self._atomic_write_index(index_path, index_data)
|
||||
issue.healed = True
|
||||
issue.heal_action = "created metadata entry"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal orphaned {bucket_name}/{full_key}: {e}")
|
||||
|
||||
self._add_issue(result, issue)
|
||||
except OSError as e:
|
||||
result.errors.append(f"check orphaned {bucket_name}: {e}")
|
||||
return last_key
|
||||
|
||||
def _check_phantom_metadata(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool,
|
||||
cursor_key: Optional[str] = None,
|
||||
) -> Optional[str]:
|
||||
if self._batch_exhausted(result):
|
||||
return None
|
||||
bucket_path = self.storage_root / bucket_name
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
|
||||
if not meta_root.exists():
|
||||
return None
|
||||
|
||||
last_key = None
|
||||
try:
|
||||
all_keys = self._collect_index_keys(meta_root, cursor_key)
|
||||
sorted_keys = sorted(all_keys.keys())
|
||||
|
||||
heal_by_index: Dict[Path, List[str]] = {}
|
||||
|
||||
for full_key in sorted_keys:
|
||||
if self._batch_exhausted(result):
|
||||
break
|
||||
|
||||
result.objects_scanned += 1
|
||||
last_key = full_key
|
||||
|
||||
object_path = bucket_path / full_key
|
||||
if not object_path.exists():
|
||||
result.phantom_metadata += 1
|
||||
info = all_keys[full_key]
|
||||
issue = IntegrityIssue(
|
||||
issue_type="phantom_metadata",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail="metadata entry without file on disk",
|
||||
)
|
||||
if auto_heal and not dry_run:
|
||||
index_file = info["index_file"]
|
||||
heal_by_index.setdefault(index_file, []).append(info["key_name"])
|
||||
issue.healed = True
|
||||
issue.heal_action = "removed stale index entry"
|
||||
result.issues_healed += 1
|
||||
self._add_issue(result, issue)
|
||||
|
||||
if heal_by_index and auto_heal and not dry_run:
|
||||
for index_file, keys_to_remove in heal_by_index.items():
|
||||
try:
|
||||
index_data = json.loads(index_file.read_text(encoding="utf-8"))
|
||||
for k in keys_to_remove:
|
||||
index_data.pop(k, None)
|
||||
if index_data:
|
||||
self._atomic_write_index(index_file, index_data)
|
||||
else:
|
||||
index_file.unlink(missing_ok=True)
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal phantom {bucket_name}: {e}")
|
||||
except OSError as e:
|
||||
result.errors.append(f"check phantom {bucket_name}: {e}")
|
||||
return last_key
|
||||
|
||||
def _check_stale_versions(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||
) -> None:
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
versions_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_VERSIONS_DIR
|
||||
|
||||
if not versions_root.exists():
|
||||
return
|
||||
|
||||
try:
|
||||
for key_dir in versions_root.rglob("*"):
|
||||
if self._throttle():
|
||||
return
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
if not key_dir.is_dir():
|
||||
continue
|
||||
|
||||
bin_files = {f.stem: f for f in key_dir.glob("*.bin")}
|
||||
json_files = {f.stem: f for f in key_dir.glob("*.json")}
|
||||
|
||||
for stem, bin_file in bin_files.items():
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
result.objects_scanned += 1
|
||||
if stem not in json_files:
|
||||
result.stale_versions += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="stale_version",
|
||||
bucket=bucket_name,
|
||||
key=f"{key_dir.relative_to(versions_root).as_posix()}/{bin_file.name}",
|
||||
detail="version data without manifest",
|
||||
)
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
bin_file.unlink(missing_ok=True)
|
||||
issue.healed = True
|
||||
issue.heal_action = "removed orphaned version data"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal stale version {bin_file}: {e}")
|
||||
self._add_issue(result, issue)
|
||||
|
||||
for stem, json_file in json_files.items():
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
result.objects_scanned += 1
|
||||
if stem not in bin_files:
|
||||
result.stale_versions += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="stale_version",
|
||||
bucket=bucket_name,
|
||||
key=f"{key_dir.relative_to(versions_root).as_posix()}/{json_file.name}",
|
||||
detail="version manifest without data",
|
||||
)
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
json_file.unlink(missing_ok=True)
|
||||
issue.healed = True
|
||||
issue.heal_action = "removed orphaned version manifest"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal stale version {json_file}: {e}")
|
||||
self._add_issue(result, issue)
|
||||
except OSError as e:
|
||||
result.errors.append(f"check stale versions {bucket_name}: {e}")
|
||||
|
||||
def _check_etag_cache(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||
) -> None:
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
etag_index_path = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / "etag_index.json"
|
||||
|
||||
if not etag_index_path.exists():
|
||||
return
|
||||
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
if not meta_root.exists():
|
||||
return
|
||||
|
||||
try:
|
||||
etag_cache = json.loads(etag_index_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return
|
||||
|
||||
found_mismatch = False
|
||||
|
||||
for full_key, cached_etag in etag_cache.items():
|
||||
if self._batch_exhausted(result):
|
||||
break
|
||||
result.objects_scanned += 1
|
||||
key_path = Path(full_key)
|
||||
key_name = key_path.name
|
||||
parent = key_path.parent
|
||||
|
||||
if parent == Path("."):
|
||||
index_path = meta_root / "_index.json"
|
||||
else:
|
||||
index_path = meta_root / parent / "_index.json"
|
||||
|
||||
if not index_path.exists():
|
||||
continue
|
||||
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
|
||||
entry = index_data.get(key_name)
|
||||
if not entry:
|
||||
continue
|
||||
|
||||
meta = entry.get("metadata", {}) if isinstance(entry, dict) else {}
|
||||
stored_etag = meta.get("__etag__")
|
||||
|
||||
if stored_etag and cached_etag != stored_etag:
|
||||
result.etag_cache_inconsistencies += 1
|
||||
found_mismatch = True
|
||||
issue = IntegrityIssue(
|
||||
issue_type="etag_cache_inconsistency",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail=f"cached_etag={cached_etag} index_etag={stored_etag}",
|
||||
)
|
||||
self._add_issue(result, issue)
|
||||
|
||||
if found_mismatch and auto_heal and not dry_run:
|
||||
try:
|
||||
etag_index_path.unlink(missing_ok=True)
|
||||
for issue in result.issues:
|
||||
if issue.issue_type == "etag_cache_inconsistency" and issue.bucket == bucket_name and not issue.healed:
|
||||
issue.healed = True
|
||||
issue.heal_action = "deleted etag_index.json"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal etag cache {bucket_name}: {e}")
|
||||
|
||||
def _check_legacy_metadata(
|
||||
self, bucket_name: str, result: IntegrityResult, auto_heal: bool, dry_run: bool
|
||||
) -> None:
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
legacy_meta_root = self.storage_root / bucket_name / ".meta"
|
||||
if not legacy_meta_root.exists():
|
||||
return
|
||||
|
||||
meta_root = self._system_path() / self.SYSTEM_BUCKETS_DIR / bucket_name / self.BUCKET_META_DIR
|
||||
|
||||
try:
|
||||
for meta_file in legacy_meta_root.rglob("*.meta.json"):
|
||||
if self._throttle():
|
||||
return
|
||||
if self._batch_exhausted(result):
|
||||
return
|
||||
if not meta_file.is_file():
|
||||
continue
|
||||
|
||||
result.objects_scanned += 1
|
||||
try:
|
||||
rel = meta_file.relative_to(legacy_meta_root)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
full_key = rel.as_posix().removesuffix(".meta.json")
|
||||
key_path = Path(full_key)
|
||||
key_name = key_path.name
|
||||
parent = key_path.parent
|
||||
|
||||
if parent == Path("."):
|
||||
index_path = meta_root / "_index.json"
|
||||
else:
|
||||
index_path = meta_root / parent / "_index.json"
|
||||
|
||||
try:
|
||||
legacy_data = json.loads(meta_file.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
continue
|
||||
|
||||
index_entry = None
|
||||
if index_path.exists():
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
index_entry = index_data.get(key_name)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
if index_entry is None:
|
||||
result.legacy_metadata_drifts += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="legacy_metadata_drift",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail="unmigrated legacy .meta.json",
|
||||
)
|
||||
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
index_data = {}
|
||||
if index_path.exists():
|
||||
try:
|
||||
index_data = json.loads(index_path.read_text(encoding="utf-8"))
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
index_data[key_name] = {"metadata": legacy_data}
|
||||
self._atomic_write_index(index_path, index_data)
|
||||
meta_file.unlink(missing_ok=True)
|
||||
issue.healed = True
|
||||
issue.heal_action = "migrated to index and deleted legacy file"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal legacy {bucket_name}/{full_key}: {e}")
|
||||
|
||||
self._add_issue(result, issue)
|
||||
else:
|
||||
index_meta = index_entry.get("metadata", {}) if isinstance(index_entry, dict) else {}
|
||||
if legacy_data != index_meta:
|
||||
result.legacy_metadata_drifts += 1
|
||||
issue = IntegrityIssue(
|
||||
issue_type="legacy_metadata_drift",
|
||||
bucket=bucket_name,
|
||||
key=full_key,
|
||||
detail="legacy .meta.json differs from index entry",
|
||||
)
|
||||
|
||||
if auto_heal and not dry_run:
|
||||
try:
|
||||
meta_file.unlink(missing_ok=True)
|
||||
issue.healed = True
|
||||
issue.heal_action = "deleted legacy file (index is authoritative)"
|
||||
result.issues_healed += 1
|
||||
except OSError as e:
|
||||
result.errors.append(f"heal legacy drift {bucket_name}/{full_key}: {e}")
|
||||
|
||||
self._add_issue(result, issue)
|
||||
except OSError as e:
|
||||
result.errors.append(f"check legacy meta {bucket_name}: {e}")
|
||||
|
||||
@staticmethod
|
||||
def _atomic_write_index(index_path: Path, data: Dict[str, Any]) -> None:
|
||||
index_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
tmp_path = index_path.with_suffix(".tmp")
|
||||
try:
|
||||
with open(tmp_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f)
|
||||
os.replace(str(tmp_path), str(index_path))
|
||||
except BaseException:
|
||||
try:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
def get_history(self, limit: int = 50, offset: int = 0) -> List[dict]:
|
||||
records = self.history_store.get_history(limit, offset)
|
||||
return [r.to_dict() for r in records]
|
||||
|
||||
def get_status(self) -> dict:
|
||||
status: Dict[str, Any] = {
|
||||
"enabled": not self._shutdown or self._timer is not None,
|
||||
"running": self._timer is not None and not self._shutdown,
|
||||
"scanning": self._scanning,
|
||||
"interval_hours": self.interval_seconds / 3600.0,
|
||||
"batch_size": self.batch_size,
|
||||
"auto_heal": self.auto_heal,
|
||||
"dry_run": self.dry_run,
|
||||
"io_throttle_ms": round(self._io_throttle * 1000),
|
||||
}
|
||||
if self._scanning and self._scan_start_time is not None:
|
||||
status["scan_elapsed_seconds"] = round(time.time() - self._scan_start_time, 1)
|
||||
status["cursor"] = self.cursor_store.get_info()
|
||||
return status
|
||||
192
app/kms.py
192
app/kms.py
@@ -1,9 +1,12 @@
|
||||
"""Key Management Service (KMS) for encryption key management."""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import subprocess
|
||||
import sys
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
@@ -14,6 +17,30 @@ from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
||||
|
||||
from .encryption import EncryptionError, EncryptionProvider, EncryptionResult
|
||||
|
||||
if sys.platform != "win32":
|
||||
import fcntl
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _set_secure_file_permissions(file_path: Path) -> None:
|
||||
"""Set restrictive file permissions (owner read/write only)."""
|
||||
if sys.platform == "win32":
|
||||
try:
|
||||
username = os.environ.get("USERNAME", "")
|
||||
if username:
|
||||
subprocess.run(
|
||||
["icacls", str(file_path), "/inheritance:r",
|
||||
"/grant:r", f"{username}:F"],
|
||||
check=True, capture_output=True
|
||||
)
|
||||
else:
|
||||
logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
|
||||
except (subprocess.SubprocessError, OSError) as exc:
|
||||
logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
|
||||
else:
|
||||
os.chmod(file_path, 0o600)
|
||||
|
||||
|
||||
@dataclass
|
||||
class KMSKey:
|
||||
@@ -75,11 +102,11 @@ class KMSEncryptionProvider(EncryptionProvider):
|
||||
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
|
||||
"""Encrypt data using envelope encryption with KMS."""
|
||||
data_key, encrypted_data_key = self.generate_data_key()
|
||||
|
||||
|
||||
aesgcm = AESGCM(data_key)
|
||||
nonce = secrets.token_bytes(12)
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext,
|
||||
json.dumps(context).encode() if context else None)
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext,
|
||||
json.dumps(context, sort_keys=True).encode() if context else None)
|
||||
|
||||
return EncryptionResult(
|
||||
ciphertext=ciphertext,
|
||||
@@ -91,15 +118,26 @@ class KMSEncryptionProvider(EncryptionProvider):
|
||||
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
|
||||
key_id: str, context: Dict[str, str] | None = None) -> bytes:
|
||||
"""Decrypt data using envelope encryption with KMS."""
|
||||
# Note: Data key is encrypted without context (AAD), so we decrypt without context
|
||||
data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
|
||||
|
||||
if len(data_key) != 32:
|
||||
raise EncryptionError("Invalid data key size")
|
||||
|
||||
aesgcm = AESGCM(data_key)
|
||||
try:
|
||||
return aesgcm.decrypt(nonce, ciphertext,
|
||||
json.dumps(context).encode() if context else None)
|
||||
json.dumps(context, sort_keys=True).encode() if context else None)
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Failed to decrypt data: {exc}") from exc
|
||||
logger.debug("KMS decryption failed: %s", exc)
|
||||
raise EncryptionError("Failed to decrypt data") from exc
|
||||
|
||||
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
|
||||
"""Decrypt an encrypted data key using KMS."""
|
||||
if key_id is None:
|
||||
key_id = self.key_id
|
||||
data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
|
||||
if len(data_key) != 32:
|
||||
raise EncryptionError("Invalid data key size")
|
||||
return data_key
|
||||
|
||||
|
||||
class KMSManager:
|
||||
@@ -109,27 +147,52 @@ class KMSManager:
|
||||
Keys are stored encrypted on disk.
|
||||
"""
|
||||
|
||||
def __init__(self, keys_path: Path, master_key_path: Path):
|
||||
def __init__(
|
||||
self,
|
||||
keys_path: Path,
|
||||
master_key_path: Path,
|
||||
generate_data_key_min_bytes: int = 1,
|
||||
generate_data_key_max_bytes: int = 1024,
|
||||
):
|
||||
self.keys_path = keys_path
|
||||
self.master_key_path = master_key_path
|
||||
self.generate_data_key_min_bytes = generate_data_key_min_bytes
|
||||
self.generate_data_key_max_bytes = generate_data_key_max_bytes
|
||||
self._keys: Dict[str, KMSKey] = {}
|
||||
self._master_key: bytes | None = None
|
||||
self._master_aesgcm: AESGCM | None = None
|
||||
self._loaded = False
|
||||
|
||||
@property
|
||||
def master_key(self) -> bytes:
|
||||
"""Load or create the master key for encrypting KMS keys."""
|
||||
"""Load or create the master key for encrypting KMS keys (with file locking)."""
|
||||
if self._master_key is None:
|
||||
if self.master_key_path.exists():
|
||||
self._master_key = base64.b64decode(
|
||||
self.master_key_path.read_text().strip()
|
||||
)
|
||||
else:
|
||||
self._master_key = secrets.token_bytes(32)
|
||||
self.master_key_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self.master_key_path.write_text(
|
||||
base64.b64encode(self._master_key).decode()
|
||||
)
|
||||
lock_path = self.master_key_path.with_suffix(".lock")
|
||||
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(lock_path, "w") as lock_file:
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
|
||||
else:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
|
||||
try:
|
||||
if self.master_key_path.exists():
|
||||
self._master_key = base64.b64decode(
|
||||
self.master_key_path.read_text().strip()
|
||||
)
|
||||
else:
|
||||
self._master_key = secrets.token_bytes(32)
|
||||
self.master_key_path.write_text(
|
||||
base64.b64encode(self._master_key).decode()
|
||||
)
|
||||
_set_secure_file_permissions(self.master_key_path)
|
||||
finally:
|
||||
if sys.platform == "win32":
|
||||
import msvcrt
|
||||
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
|
||||
else:
|
||||
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
|
||||
self._master_aesgcm = AESGCM(self._master_key)
|
||||
return self._master_key
|
||||
|
||||
def _load_keys(self) -> None:
|
||||
@@ -146,8 +209,10 @@ class KMSManager:
|
||||
encrypted = base64.b64decode(key_data["EncryptedKeyMaterial"])
|
||||
key.key_material = self._decrypt_key_material(encrypted)
|
||||
self._keys[key.key_id] = key
|
||||
except Exception:
|
||||
pass
|
||||
except json.JSONDecodeError as exc:
|
||||
logger.error("Failed to parse KMS keys file: %s", exc)
|
||||
except (ValueError, KeyError) as exc:
|
||||
logger.error("Invalid KMS key data: %s", exc)
|
||||
|
||||
self._loaded = True
|
||||
|
||||
@@ -159,26 +224,25 @@ class KMSManager:
|
||||
encrypted = self._encrypt_key_material(key.key_material)
|
||||
data["EncryptedKeyMaterial"] = base64.b64encode(encrypted).decode()
|
||||
keys_data.append(data)
|
||||
|
||||
|
||||
self.keys_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self.keys_path.write_text(
|
||||
json.dumps({"keys": keys_data}, indent=2),
|
||||
encoding="utf-8"
|
||||
)
|
||||
_set_secure_file_permissions(self.keys_path)
|
||||
|
||||
def _encrypt_key_material(self, key_material: bytes) -> bytes:
|
||||
"""Encrypt key material with the master key."""
|
||||
aesgcm = AESGCM(self.master_key)
|
||||
_ = self.master_key
|
||||
nonce = secrets.token_bytes(12)
|
||||
ciphertext = aesgcm.encrypt(nonce, key_material, None)
|
||||
ciphertext = self._master_aesgcm.encrypt(nonce, key_material, None)
|
||||
return nonce + ciphertext
|
||||
|
||||
|
||||
def _decrypt_key_material(self, encrypted: bytes) -> bytes:
|
||||
"""Decrypt key material with the master key."""
|
||||
aesgcm = AESGCM(self.master_key)
|
||||
_ = self.master_key
|
||||
nonce = encrypted[:12]
|
||||
ciphertext = encrypted[12:]
|
||||
return aesgcm.decrypt(nonce, ciphertext, None)
|
||||
return self._master_aesgcm.decrypt(nonce, ciphertext, None)
|
||||
|
||||
def create_key(self, description: str = "", key_id: str | None = None) -> KMSKey:
|
||||
"""Create a new KMS key."""
|
||||
@@ -211,7 +275,27 @@ class KMSManager:
|
||||
"""List all keys."""
|
||||
self._load_keys()
|
||||
return list(self._keys.values())
|
||||
|
||||
|
||||
def get_default_key_id(self) -> str:
|
||||
"""Get the default KMS key ID, creating one if none exist."""
|
||||
self._load_keys()
|
||||
for key in self._keys.values():
|
||||
if key.enabled:
|
||||
return key.key_id
|
||||
default_key = self.create_key(description="Default KMS Key")
|
||||
return default_key.key_id
|
||||
|
||||
def get_provider(self, key_id: str | None = None) -> "KMSEncryptionProvider":
|
||||
"""Get a KMS encryption provider for the specified key."""
|
||||
if key_id is None:
|
||||
key_id = self.get_default_key_id()
|
||||
key = self.get_key(key_id)
|
||||
if not key:
|
||||
raise EncryptionError(f"Key not found: {key_id}")
|
||||
if not key.enabled:
|
||||
raise EncryptionError(f"Key is disabled: {key_id}")
|
||||
return KMSEncryptionProvider(self, key_id)
|
||||
|
||||
def enable_key(self, key_id: str) -> None:
|
||||
"""Enable a key."""
|
||||
self._load_keys()
|
||||
@@ -250,7 +334,7 @@ class KMSManager:
|
||||
|
||||
aesgcm = AESGCM(key.key_material)
|
||||
nonce = secrets.token_bytes(12)
|
||||
aad = json.dumps(context).encode() if context else None
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
|
||||
|
||||
key_id_bytes = key_id.encode("utf-8")
|
||||
@@ -279,17 +363,24 @@ class KMSManager:
|
||||
encrypted = rest[12:]
|
||||
|
||||
aesgcm = AESGCM(key.key_material)
|
||||
aad = json.dumps(context).encode() if context else None
|
||||
aad = json.dumps(context, sort_keys=True).encode() if context else None
|
||||
try:
|
||||
plaintext = aesgcm.decrypt(nonce, encrypted, aad)
|
||||
return plaintext, key_id
|
||||
except Exception as exc:
|
||||
raise EncryptionError(f"Decryption failed: {exc}") from exc
|
||||
logger.debug("KMS decrypt operation failed: %s", exc)
|
||||
raise EncryptionError("Decryption failed") from exc
|
||||
|
||||
def generate_data_key(self, key_id: str,
|
||||
context: Dict[str, str] | None = None) -> tuple[bytes, bytes]:
|
||||
context: Dict[str, str] | None = None,
|
||||
key_spec: str = "AES_256") -> tuple[bytes, bytes]:
|
||||
"""Generate a data key and return both plaintext and encrypted versions.
|
||||
|
||||
|
||||
Args:
|
||||
key_id: The KMS key ID to use for encryption
|
||||
context: Optional encryption context
|
||||
key_spec: Key specification - AES_128 or AES_256 (default)
|
||||
|
||||
Returns:
|
||||
Tuple of (plaintext_key, encrypted_key)
|
||||
"""
|
||||
@@ -299,11 +390,12 @@ class KMSManager:
|
||||
raise EncryptionError(f"Key not found: {key_id}")
|
||||
if not key.enabled:
|
||||
raise EncryptionError(f"Key is disabled: {key_id}")
|
||||
|
||||
plaintext_key = secrets.token_bytes(32)
|
||||
|
||||
key_bytes = 32 if key_spec == "AES_256" else 16
|
||||
plaintext_key = secrets.token_bytes(key_bytes)
|
||||
|
||||
encrypted_key = self.encrypt(key_id, plaintext_key, context)
|
||||
|
||||
|
||||
return plaintext_key, encrypted_key
|
||||
|
||||
def decrypt_data_key(self, key_id: str, encrypted_key: bytes,
|
||||
@@ -312,22 +404,6 @@ class KMSManager:
|
||||
plaintext, _ = self.decrypt(encrypted_key, context)
|
||||
return plaintext
|
||||
|
||||
def get_provider(self, key_id: str | None = None) -> KMSEncryptionProvider:
|
||||
"""Get an encryption provider for a specific key."""
|
||||
self._load_keys()
|
||||
|
||||
if key_id is None:
|
||||
if not self._keys:
|
||||
key = self.create_key("Default KMS Key")
|
||||
key_id = key.key_id
|
||||
else:
|
||||
key_id = next(iter(self._keys.keys()))
|
||||
|
||||
if key_id not in self._keys:
|
||||
raise EncryptionError(f"Key not found: {key_id}")
|
||||
|
||||
return KMSEncryptionProvider(self, key_id)
|
||||
|
||||
def re_encrypt(self, ciphertext: bytes, destination_key_id: str,
|
||||
source_context: Dict[str, str] | None = None,
|
||||
destination_context: Dict[str, str] | None = None) -> bytes:
|
||||
@@ -339,6 +415,8 @@ class KMSManager:
|
||||
|
||||
def generate_random(self, num_bytes: int = 32) -> bytes:
|
||||
"""Generate cryptographically secure random bytes."""
|
||||
if num_bytes < 1 or num_bytes > 1024:
|
||||
raise EncryptionError("Number of bytes must be between 1 and 1024")
|
||||
if num_bytes < self.generate_data_key_min_bytes or num_bytes > self.generate_data_key_max_bytes:
|
||||
raise EncryptionError(
|
||||
f"Number of bytes must be between {self.generate_data_key_min_bytes} and {self.generate_data_key_max_bytes}"
|
||||
)
|
||||
return secrets.token_bytes(num_bytes)
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
"""KMS and encryption API endpoints."""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
|
||||
340
app/lifecycle.py
Normal file
340
app/lifecycle.py
Normal file
@@ -0,0 +1,340 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .storage import ObjectStorage, StorageError
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class LifecycleResult:
|
||||
bucket_name: str
|
||||
objects_deleted: int = 0
|
||||
versions_deleted: int = 0
|
||||
uploads_aborted: int = 0
|
||||
errors: List[str] = field(default_factory=list)
|
||||
execution_time_seconds: float = 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class LifecycleExecutionRecord:
|
||||
timestamp: float
|
||||
bucket_name: str
|
||||
objects_deleted: int
|
||||
versions_deleted: int
|
||||
uploads_aborted: int
|
||||
errors: List[str]
|
||||
execution_time_seconds: float
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"timestamp": self.timestamp,
|
||||
"bucket_name": self.bucket_name,
|
||||
"objects_deleted": self.objects_deleted,
|
||||
"versions_deleted": self.versions_deleted,
|
||||
"uploads_aborted": self.uploads_aborted,
|
||||
"errors": self.errors,
|
||||
"execution_time_seconds": self.execution_time_seconds,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "LifecycleExecutionRecord":
|
||||
return cls(
|
||||
timestamp=data["timestamp"],
|
||||
bucket_name=data["bucket_name"],
|
||||
objects_deleted=data["objects_deleted"],
|
||||
versions_deleted=data["versions_deleted"],
|
||||
uploads_aborted=data["uploads_aborted"],
|
||||
errors=data.get("errors", []),
|
||||
execution_time_seconds=data["execution_time_seconds"],
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_result(cls, result: LifecycleResult) -> "LifecycleExecutionRecord":
|
||||
return cls(
|
||||
timestamp=time.time(),
|
||||
bucket_name=result.bucket_name,
|
||||
objects_deleted=result.objects_deleted,
|
||||
versions_deleted=result.versions_deleted,
|
||||
uploads_aborted=result.uploads_aborted,
|
||||
errors=result.errors.copy(),
|
||||
execution_time_seconds=result.execution_time_seconds,
|
||||
)
|
||||
|
||||
|
||||
class LifecycleHistoryStore:
|
||||
def __init__(self, storage_root: Path, max_history_per_bucket: int = 50) -> None:
|
||||
self.storage_root = storage_root
|
||||
self.max_history_per_bucket = max_history_per_bucket
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def _get_history_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "lifecycle_history.json"
|
||||
|
||||
def load_history(self, bucket_name: str) -> List[LifecycleExecutionRecord]:
|
||||
path = self._get_history_path(bucket_name)
|
||||
if not path.exists():
|
||||
return []
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
data = json.load(f)
|
||||
return [LifecycleExecutionRecord.from_dict(d) for d in data.get("executions", [])]
|
||||
except (OSError, ValueError, KeyError) as e:
|
||||
logger.error(f"Failed to load lifecycle history for {bucket_name}: {e}")
|
||||
return []
|
||||
|
||||
def save_history(self, bucket_name: str, records: List[LifecycleExecutionRecord]) -> None:
|
||||
path = self._get_history_path(bucket_name)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {"executions": [r.to_dict() for r in records[:self.max_history_per_bucket]]}
|
||||
try:
|
||||
with open(path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to save lifecycle history for {bucket_name}: {e}")
|
||||
|
||||
def add_record(self, bucket_name: str, record: LifecycleExecutionRecord) -> None:
|
||||
with self._lock:
|
||||
records = self.load_history(bucket_name)
|
||||
records.insert(0, record)
|
||||
self.save_history(bucket_name, records)
|
||||
|
||||
def get_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]:
|
||||
records = self.load_history(bucket_name)
|
||||
return records[offset:offset + limit]
|
||||
|
||||
|
||||
class LifecycleManager:
|
||||
def __init__(
|
||||
self,
|
||||
storage: ObjectStorage,
|
||||
interval_seconds: int = 3600,
|
||||
storage_root: Optional[Path] = None,
|
||||
max_history_per_bucket: int = 50,
|
||||
):
|
||||
self.storage = storage
|
||||
self.interval_seconds = interval_seconds
|
||||
self.storage_root = storage_root
|
||||
self._timer: Optional[threading.Timer] = None
|
||||
self._shutdown = False
|
||||
self._lock = threading.Lock()
|
||||
self.history_store = LifecycleHistoryStore(storage_root, max_history_per_bucket) if storage_root else None
|
||||
|
||||
def start(self) -> None:
|
||||
if self._timer is not None:
|
||||
return
|
||||
self._shutdown = False
|
||||
self._schedule_next()
|
||||
logger.info(f"Lifecycle manager started with interval {self.interval_seconds}s")
|
||||
|
||||
def stop(self) -> None:
|
||||
self._shutdown = True
|
||||
if self._timer:
|
||||
self._timer.cancel()
|
||||
self._timer = None
|
||||
logger.info("Lifecycle manager stopped")
|
||||
|
||||
def _schedule_next(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
self._timer = threading.Timer(self.interval_seconds, self._run_enforcement)
|
||||
self._timer.daemon = True
|
||||
self._timer.start()
|
||||
|
||||
def _run_enforcement(self) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
try:
|
||||
self.enforce_all_buckets()
|
||||
except Exception as e:
|
||||
logger.error(f"Lifecycle enforcement failed: {e}")
|
||||
finally:
|
||||
self._schedule_next()
|
||||
|
||||
def enforce_all_buckets(self) -> Dict[str, LifecycleResult]:
|
||||
results = {}
|
||||
try:
|
||||
buckets = self.storage.list_buckets()
|
||||
for bucket in buckets:
|
||||
result = self.enforce_rules(bucket.name)
|
||||
if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0:
|
||||
results[bucket.name] = result
|
||||
except StorageError as e:
|
||||
logger.error(f"Failed to list buckets for lifecycle: {e}")
|
||||
return results
|
||||
|
||||
def enforce_rules(self, bucket_name: str) -> LifecycleResult:
|
||||
start_time = time.time()
|
||||
result = LifecycleResult(bucket_name=bucket_name)
|
||||
|
||||
try:
|
||||
lifecycle = self.storage.get_bucket_lifecycle(bucket_name)
|
||||
if not lifecycle:
|
||||
return result
|
||||
|
||||
for rule in lifecycle:
|
||||
if rule.get("Status") != "Enabled":
|
||||
continue
|
||||
rule_id = rule.get("ID", "unknown")
|
||||
prefix = rule.get("Prefix", rule.get("Filter", {}).get("Prefix", ""))
|
||||
|
||||
self._enforce_expiration(bucket_name, rule, prefix, result)
|
||||
self._enforce_noncurrent_expiration(bucket_name, rule, prefix, result)
|
||||
self._enforce_abort_multipart(bucket_name, rule, result)
|
||||
|
||||
except StorageError as e:
|
||||
result.errors.append(str(e))
|
||||
logger.error(f"Lifecycle enforcement error for {bucket_name}: {e}")
|
||||
|
||||
result.execution_time_seconds = time.time() - start_time
|
||||
if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0 or result.errors:
|
||||
logger.info(
|
||||
f"Lifecycle enforcement for {bucket_name}: "
|
||||
f"deleted={result.objects_deleted}, versions={result.versions_deleted}, "
|
||||
f"aborted={result.uploads_aborted}, time={result.execution_time_seconds:.2f}s"
|
||||
)
|
||||
if self.history_store:
|
||||
record = LifecycleExecutionRecord.from_result(result)
|
||||
self.history_store.add_record(bucket_name, record)
|
||||
return result
|
||||
|
||||
def _enforce_expiration(
|
||||
self, bucket_name: str, rule: Dict[str, Any], prefix: str, result: LifecycleResult
|
||||
) -> None:
|
||||
expiration = rule.get("Expiration", {})
|
||||
if not expiration:
|
||||
return
|
||||
|
||||
days = expiration.get("Days")
|
||||
date_str = expiration.get("Date")
|
||||
|
||||
if days:
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
|
||||
elif date_str:
|
||||
try:
|
||||
cutoff = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
return
|
||||
else:
|
||||
return
|
||||
|
||||
try:
|
||||
objects = self.storage.list_objects_all(bucket_name)
|
||||
for obj in objects:
|
||||
if prefix and not obj.key.startswith(prefix):
|
||||
continue
|
||||
if obj.last_modified < cutoff:
|
||||
try:
|
||||
self.storage.delete_object(bucket_name, obj.key)
|
||||
result.objects_deleted += 1
|
||||
except StorageError as e:
|
||||
result.errors.append(f"Failed to delete {obj.key}: {e}")
|
||||
except StorageError as e:
|
||||
result.errors.append(f"Failed to list objects: {e}")
|
||||
|
||||
def _enforce_noncurrent_expiration(
|
||||
self, bucket_name: str, rule: Dict[str, Any], prefix: str, result: LifecycleResult
|
||||
) -> None:
|
||||
noncurrent = rule.get("NoncurrentVersionExpiration", {})
|
||||
noncurrent_days = noncurrent.get("NoncurrentDays")
|
||||
if not noncurrent_days:
|
||||
return
|
||||
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=noncurrent_days)
|
||||
|
||||
try:
|
||||
objects = self.storage.list_objects_all(bucket_name)
|
||||
for obj in objects:
|
||||
if prefix and not obj.key.startswith(prefix):
|
||||
continue
|
||||
try:
|
||||
versions = self.storage.list_object_versions(bucket_name, obj.key)
|
||||
for version in versions:
|
||||
archived_at_str = version.get("archived_at", "")
|
||||
if not archived_at_str:
|
||||
continue
|
||||
try:
|
||||
archived_at = datetime.fromisoformat(archived_at_str.replace("Z", "+00:00"))
|
||||
if archived_at < cutoff:
|
||||
version_id = version.get("version_id")
|
||||
if version_id:
|
||||
self.storage.delete_object_version(bucket_name, obj.key, version_id)
|
||||
result.versions_deleted += 1
|
||||
except (ValueError, StorageError) as e:
|
||||
result.errors.append(f"Failed to process version: {e}")
|
||||
except StorageError:
|
||||
pass
|
||||
except StorageError as e:
|
||||
result.errors.append(f"Failed to list objects: {e}")
|
||||
|
||||
try:
|
||||
orphaned = self.storage.list_orphaned_objects(bucket_name)
|
||||
for item in orphaned:
|
||||
obj_key = item.get("key", "")
|
||||
if prefix and not obj_key.startswith(prefix):
|
||||
continue
|
||||
try:
|
||||
versions = self.storage.list_object_versions(bucket_name, obj_key)
|
||||
for version in versions:
|
||||
archived_at_str = version.get("archived_at", "")
|
||||
if not archived_at_str:
|
||||
continue
|
||||
try:
|
||||
archived_at = datetime.fromisoformat(archived_at_str.replace("Z", "+00:00"))
|
||||
if archived_at < cutoff:
|
||||
version_id = version.get("version_id")
|
||||
if version_id:
|
||||
self.storage.delete_object_version(bucket_name, obj_key, version_id)
|
||||
result.versions_deleted += 1
|
||||
except (ValueError, StorageError) as e:
|
||||
result.errors.append(f"Failed to process orphaned version: {e}")
|
||||
except StorageError:
|
||||
pass
|
||||
except StorageError as e:
|
||||
result.errors.append(f"Failed to list orphaned objects: {e}")
|
||||
|
||||
def _enforce_abort_multipart(
|
||||
self, bucket_name: str, rule: Dict[str, Any], result: LifecycleResult
|
||||
) -> None:
|
||||
abort_config = rule.get("AbortIncompleteMultipartUpload", {})
|
||||
days_after = abort_config.get("DaysAfterInitiation")
|
||||
if not days_after:
|
||||
return
|
||||
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(days=days_after)
|
||||
|
||||
try:
|
||||
uploads = self.storage.list_multipart_uploads(bucket_name)
|
||||
for upload in uploads:
|
||||
created_at_str = upload.get("created_at", "")
|
||||
if not created_at_str:
|
||||
continue
|
||||
try:
|
||||
created_at = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
|
||||
if created_at < cutoff:
|
||||
upload_id = upload.get("upload_id")
|
||||
if upload_id:
|
||||
self.storage.abort_multipart_upload(bucket_name, upload_id)
|
||||
result.uploads_aborted += 1
|
||||
except (ValueError, StorageError) as e:
|
||||
result.errors.append(f"Failed to abort upload: {e}")
|
||||
except StorageError as e:
|
||||
result.errors.append(f"Failed to list multipart uploads: {e}")
|
||||
|
||||
def run_now(self, bucket_name: Optional[str] = None) -> Dict[str, LifecycleResult]:
|
||||
if bucket_name:
|
||||
return {bucket_name: self.enforce_rules(bucket_name)}
|
||||
return self.enforce_all_buckets()
|
||||
|
||||
def get_execution_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]:
|
||||
if not self.history_store:
|
||||
return []
|
||||
return self.history_store.get_history(bucket_name, limit, offset)
|
||||
406
app/notifications.py
Normal file
406
app/notifications.py
Normal file
@@ -0,0 +1,406 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import json
|
||||
import logging
|
||||
import queue
|
||||
import socket
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
from urllib3.util.connection import create_connection as _urllib3_create_connection
|
||||
|
||||
|
||||
def _resolve_and_check_url(url: str, allow_internal: bool = False) -> Optional[str]:
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.hostname
|
||||
if not hostname:
|
||||
return None
|
||||
cloud_metadata_hosts = {
|
||||
"metadata.google.internal",
|
||||
"169.254.169.254",
|
||||
}
|
||||
if hostname.lower() in cloud_metadata_hosts:
|
||||
return None
|
||||
if allow_internal:
|
||||
return hostname
|
||||
blocked_hosts = {
|
||||
"localhost",
|
||||
"127.0.0.1",
|
||||
"0.0.0.0",
|
||||
"::1",
|
||||
"[::1]",
|
||||
}
|
||||
if hostname.lower() in blocked_hosts:
|
||||
return None
|
||||
try:
|
||||
resolved_ip = socket.gethostbyname(hostname)
|
||||
ip = ipaddress.ip_address(resolved_ip)
|
||||
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
|
||||
return None
|
||||
return resolved_ip
|
||||
except (socket.gaierror, ValueError):
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
|
||||
return _resolve_and_check_url(url, allow_internal) is not None
|
||||
|
||||
|
||||
_dns_pin_lock = threading.Lock()
|
||||
|
||||
|
||||
def _pinned_post(url: str, pinned_ip: str, **kwargs: Any) -> requests.Response:
|
||||
parsed = urlparse(url)
|
||||
hostname = parsed.hostname or ""
|
||||
session = requests.Session()
|
||||
original_create = _urllib3_create_connection
|
||||
|
||||
def _create_pinned(address: Any, *args: Any, **kw: Any) -> Any:
|
||||
host, req_port = address
|
||||
if host == hostname:
|
||||
return original_create((pinned_ip, req_port), *args, **kw)
|
||||
return original_create(address, *args, **kw)
|
||||
|
||||
import urllib3.util.connection as _conn_mod
|
||||
with _dns_pin_lock:
|
||||
_conn_mod.create_connection = _create_pinned
|
||||
try:
|
||||
return session.post(url, **kwargs)
|
||||
finally:
|
||||
_conn_mod.create_connection = original_create
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class NotificationEvent:
|
||||
event_name: str
|
||||
bucket_name: str
|
||||
object_key: str
|
||||
object_size: int = 0
|
||||
etag: str = ""
|
||||
version_id: Optional[str] = None
|
||||
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
||||
request_id: str = field(default_factory=lambda: uuid.uuid4().hex)
|
||||
source_ip: str = ""
|
||||
user_identity: str = ""
|
||||
|
||||
def to_s3_event(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"Records": [
|
||||
{
|
||||
"eventVersion": "2.1",
|
||||
"eventSource": "myfsio:s3",
|
||||
"awsRegion": "local",
|
||||
"eventTime": self.timestamp.strftime("%Y-%m-%dT%H:%M:%S.000Z"),
|
||||
"eventName": self.event_name,
|
||||
"userIdentity": {
|
||||
"principalId": self.user_identity or "ANONYMOUS",
|
||||
},
|
||||
"requestParameters": {
|
||||
"sourceIPAddress": self.source_ip or "127.0.0.1",
|
||||
},
|
||||
"responseElements": {
|
||||
"x-amz-request-id": self.request_id,
|
||||
"x-amz-id-2": self.request_id,
|
||||
},
|
||||
"s3": {
|
||||
"s3SchemaVersion": "1.0",
|
||||
"configurationId": "notification",
|
||||
"bucket": {
|
||||
"name": self.bucket_name,
|
||||
"ownerIdentity": {"principalId": "local"},
|
||||
"arn": f"arn:aws:s3:::{self.bucket_name}",
|
||||
},
|
||||
"object": {
|
||||
"key": self.object_key,
|
||||
"size": self.object_size,
|
||||
"eTag": self.etag,
|
||||
"versionId": self.version_id or "null",
|
||||
"sequencer": f"{int(time.time() * 1000):016X}",
|
||||
},
|
||||
},
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class WebhookDestination:
|
||||
url: str
|
||||
headers: Dict[str, str] = field(default_factory=dict)
|
||||
timeout_seconds: int = 30
|
||||
retry_count: int = 3
|
||||
retry_delay_seconds: int = 1
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"url": self.url,
|
||||
"headers": self.headers,
|
||||
"timeout_seconds": self.timeout_seconds,
|
||||
"retry_count": self.retry_count,
|
||||
"retry_delay_seconds": self.retry_delay_seconds,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "WebhookDestination":
|
||||
return cls(
|
||||
url=data.get("url", ""),
|
||||
headers=data.get("headers", {}),
|
||||
timeout_seconds=data.get("timeout_seconds", 30),
|
||||
retry_count=data.get("retry_count", 3),
|
||||
retry_delay_seconds=data.get("retry_delay_seconds", 1),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class NotificationConfiguration:
|
||||
id: str
|
||||
events: List[str]
|
||||
destination: WebhookDestination
|
||||
prefix_filter: str = ""
|
||||
suffix_filter: str = ""
|
||||
|
||||
def matches_event(self, event_name: str, object_key: str) -> bool:
|
||||
event_match = False
|
||||
for pattern in self.events:
|
||||
if pattern.endswith("*"):
|
||||
base = pattern[:-1]
|
||||
if event_name.startswith(base):
|
||||
event_match = True
|
||||
break
|
||||
elif pattern == event_name:
|
||||
event_match = True
|
||||
break
|
||||
|
||||
if not event_match:
|
||||
return False
|
||||
|
||||
if self.prefix_filter and not object_key.startswith(self.prefix_filter):
|
||||
return False
|
||||
if self.suffix_filter and not object_key.endswith(self.suffix_filter):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"Id": self.id,
|
||||
"Events": self.events,
|
||||
"Destination": self.destination.to_dict(),
|
||||
"Filter": {
|
||||
"Key": {
|
||||
"FilterRules": [
|
||||
{"Name": "prefix", "Value": self.prefix_filter},
|
||||
{"Name": "suffix", "Value": self.suffix_filter},
|
||||
]
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "NotificationConfiguration":
|
||||
prefix = ""
|
||||
suffix = ""
|
||||
filter_data = data.get("Filter", {})
|
||||
key_filter = filter_data.get("Key", {})
|
||||
for rule in key_filter.get("FilterRules", []):
|
||||
if rule.get("Name") == "prefix":
|
||||
prefix = rule.get("Value", "")
|
||||
elif rule.get("Name") == "suffix":
|
||||
suffix = rule.get("Value", "")
|
||||
|
||||
return cls(
|
||||
id=data.get("Id", uuid.uuid4().hex),
|
||||
events=data.get("Events", []),
|
||||
destination=WebhookDestination.from_dict(data.get("Destination", {})),
|
||||
prefix_filter=prefix,
|
||||
suffix_filter=suffix,
|
||||
)
|
||||
|
||||
|
||||
class NotificationService:
|
||||
def __init__(self, storage_root: Path, worker_count: int = 2, allow_internal_endpoints: bool = False):
|
||||
self.storage_root = storage_root
|
||||
self._allow_internal_endpoints = allow_internal_endpoints
|
||||
self._configs: Dict[str, List[NotificationConfiguration]] = {}
|
||||
self._queue: queue.Queue[tuple[NotificationEvent, WebhookDestination]] = queue.Queue()
|
||||
self._workers: List[threading.Thread] = []
|
||||
self._shutdown = threading.Event()
|
||||
self._stats = {
|
||||
"events_queued": 0,
|
||||
"events_sent": 0,
|
||||
"events_failed": 0,
|
||||
}
|
||||
|
||||
for i in range(worker_count):
|
||||
worker = threading.Thread(target=self._worker_loop, name=f"notification-worker-{i}", daemon=True)
|
||||
worker.start()
|
||||
self._workers.append(worker)
|
||||
|
||||
def _config_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "notifications.json"
|
||||
|
||||
def get_bucket_notifications(self, bucket_name: str) -> List[NotificationConfiguration]:
|
||||
if bucket_name in self._configs:
|
||||
return self._configs[bucket_name]
|
||||
|
||||
config_path = self._config_path(bucket_name)
|
||||
if not config_path.exists():
|
||||
return []
|
||||
|
||||
try:
|
||||
data = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
configs = [NotificationConfiguration.from_dict(c) for c in data.get("configurations", [])]
|
||||
self._configs[bucket_name] = configs
|
||||
return configs
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
logger.warning(f"Failed to load notification config for {bucket_name}: {e}")
|
||||
return []
|
||||
|
||||
def set_bucket_notifications(
|
||||
self, bucket_name: str, configurations: List[NotificationConfiguration]
|
||||
) -> None:
|
||||
config_path = self._config_path(bucket_name)
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
data = {"configurations": [c.to_dict() for c in configurations]}
|
||||
config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||
self._configs[bucket_name] = configurations
|
||||
|
||||
def delete_bucket_notifications(self, bucket_name: str) -> None:
|
||||
config_path = self._config_path(bucket_name)
|
||||
try:
|
||||
if config_path.exists():
|
||||
config_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
self._configs.pop(bucket_name, None)
|
||||
|
||||
def emit_event(self, event: NotificationEvent) -> None:
|
||||
configurations = self.get_bucket_notifications(event.bucket_name)
|
||||
if not configurations:
|
||||
return
|
||||
|
||||
for config in configurations:
|
||||
if config.matches_event(event.event_name, event.object_key):
|
||||
self._queue.put((event, config.destination))
|
||||
self._stats["events_queued"] += 1
|
||||
logger.debug(
|
||||
f"Queued notification for {event.event_name} on {event.bucket_name}/{event.object_key}"
|
||||
)
|
||||
|
||||
def emit_object_created(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_key: str,
|
||||
*,
|
||||
size: int = 0,
|
||||
etag: str = "",
|
||||
version_id: Optional[str] = None,
|
||||
request_id: str = "",
|
||||
source_ip: str = "",
|
||||
user_identity: str = "",
|
||||
operation: str = "Put",
|
||||
) -> None:
|
||||
event = NotificationEvent(
|
||||
event_name=f"s3:ObjectCreated:{operation}",
|
||||
bucket_name=bucket_name,
|
||||
object_key=object_key,
|
||||
object_size=size,
|
||||
etag=etag,
|
||||
version_id=version_id,
|
||||
request_id=request_id or uuid.uuid4().hex,
|
||||
source_ip=source_ip,
|
||||
user_identity=user_identity,
|
||||
)
|
||||
self.emit_event(event)
|
||||
|
||||
def emit_object_removed(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_key: str,
|
||||
*,
|
||||
version_id: Optional[str] = None,
|
||||
request_id: str = "",
|
||||
source_ip: str = "",
|
||||
user_identity: str = "",
|
||||
operation: str = "Delete",
|
||||
) -> None:
|
||||
event = NotificationEvent(
|
||||
event_name=f"s3:ObjectRemoved:{operation}",
|
||||
bucket_name=bucket_name,
|
||||
object_key=object_key,
|
||||
version_id=version_id,
|
||||
request_id=request_id or uuid.uuid4().hex,
|
||||
source_ip=source_ip,
|
||||
user_identity=user_identity,
|
||||
)
|
||||
self.emit_event(event)
|
||||
|
||||
def _worker_loop(self) -> None:
|
||||
while not self._shutdown.is_set():
|
||||
try:
|
||||
event, destination = self._queue.get(timeout=1.0)
|
||||
except queue.Empty:
|
||||
continue
|
||||
|
||||
try:
|
||||
self._send_notification(event, destination)
|
||||
self._stats["events_sent"] += 1
|
||||
except Exception as e:
|
||||
self._stats["events_failed"] += 1
|
||||
logger.error(f"Failed to send notification: {e}")
|
||||
finally:
|
||||
self._queue.task_done()
|
||||
|
||||
def _send_notification(self, event: NotificationEvent, destination: WebhookDestination) -> None:
|
||||
resolved_ip = _resolve_and_check_url(destination.url, allow_internal=self._allow_internal_endpoints)
|
||||
if not resolved_ip:
|
||||
raise RuntimeError(f"Blocked request (SSRF protection): {destination.url}")
|
||||
payload = event.to_s3_event()
|
||||
headers = {"Content-Type": "application/json", **destination.headers}
|
||||
|
||||
last_error = None
|
||||
for attempt in range(destination.retry_count):
|
||||
try:
|
||||
response = _pinned_post(
|
||||
destination.url,
|
||||
resolved_ip,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
timeout=destination.timeout_seconds,
|
||||
)
|
||||
if response.status_code < 400:
|
||||
logger.info(
|
||||
f"Notification sent: {event.event_name} -> {destination.url} (status={response.status_code})"
|
||||
)
|
||||
return
|
||||
last_error = f"HTTP {response.status_code}: {response.text[:200]}"
|
||||
except requests.RequestException as e:
|
||||
last_error = str(e)
|
||||
|
||||
if attempt < destination.retry_count - 1:
|
||||
time.sleep(destination.retry_delay_seconds * (attempt + 1))
|
||||
|
||||
raise RuntimeError(f"Failed after {destination.retry_count} attempts: {last_error}")
|
||||
|
||||
def get_stats(self) -> Dict[str, int]:
|
||||
return dict(self._stats)
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._shutdown.set()
|
||||
for worker in self._workers:
|
||||
worker.join(timeout=5.0)
|
||||
234
app/object_lock.py
Normal file
234
app/object_lock.py
Normal file
@@ -0,0 +1,234 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
class RetentionMode(Enum):
|
||||
GOVERNANCE = "GOVERNANCE"
|
||||
COMPLIANCE = "COMPLIANCE"
|
||||
|
||||
|
||||
class ObjectLockError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class ObjectLockRetention:
|
||||
mode: RetentionMode
|
||||
retain_until_date: datetime
|
||||
|
||||
def to_dict(self) -> Dict[str, str]:
|
||||
return {
|
||||
"Mode": self.mode.value,
|
||||
"RetainUntilDate": self.retain_until_date.isoformat(),
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> Optional["ObjectLockRetention"]:
|
||||
if not data:
|
||||
return None
|
||||
mode_str = data.get("Mode")
|
||||
date_str = data.get("RetainUntilDate")
|
||||
if not mode_str or not date_str:
|
||||
return None
|
||||
try:
|
||||
mode = RetentionMode(mode_str)
|
||||
retain_until = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
|
||||
return cls(mode=mode, retain_until_date=retain_until)
|
||||
except (ValueError, KeyError):
|
||||
return None
|
||||
|
||||
def is_expired(self) -> bool:
|
||||
return datetime.now(timezone.utc) > self.retain_until_date
|
||||
|
||||
|
||||
@dataclass
|
||||
class ObjectLockConfig:
|
||||
enabled: bool = False
|
||||
default_retention: Optional[ObjectLockRetention] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
result: Dict[str, Any] = {"ObjectLockEnabled": "Enabled" if self.enabled else "Disabled"}
|
||||
if self.default_retention:
|
||||
result["Rule"] = {
|
||||
"DefaultRetention": {
|
||||
"Mode": self.default_retention.mode.value,
|
||||
"Days": None,
|
||||
"Years": None,
|
||||
}
|
||||
}
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "ObjectLockConfig":
|
||||
enabled = data.get("ObjectLockEnabled") == "Enabled"
|
||||
default_retention = None
|
||||
rule = data.get("Rule")
|
||||
if rule and "DefaultRetention" in rule:
|
||||
dr = rule["DefaultRetention"]
|
||||
mode_str = dr.get("Mode", "GOVERNANCE")
|
||||
days = dr.get("Days")
|
||||
years = dr.get("Years")
|
||||
if days or years:
|
||||
from datetime import timedelta
|
||||
now = datetime.now(timezone.utc)
|
||||
if years:
|
||||
delta = timedelta(days=int(years) * 365)
|
||||
else:
|
||||
delta = timedelta(days=int(days))
|
||||
default_retention = ObjectLockRetention(
|
||||
mode=RetentionMode(mode_str),
|
||||
retain_until_date=now + delta,
|
||||
)
|
||||
return cls(enabled=enabled, default_retention=default_retention)
|
||||
|
||||
|
||||
class ObjectLockService:
|
||||
def __init__(self, storage_root: Path):
|
||||
self.storage_root = storage_root
|
||||
self._config_cache: Dict[str, ObjectLockConfig] = {}
|
||||
|
||||
def _bucket_lock_config_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "object_lock.json"
|
||||
|
||||
def _object_lock_meta_path(self, bucket_name: str, object_key: str) -> Path:
|
||||
safe_key = object_key.replace("/", "_").replace("\\", "_")
|
||||
return (
|
||||
self.storage_root / ".myfsio.sys" / "buckets" / bucket_name /
|
||||
"locks" / f"{safe_key}.lock.json"
|
||||
)
|
||||
|
||||
def get_bucket_lock_config(self, bucket_name: str) -> ObjectLockConfig:
|
||||
if bucket_name in self._config_cache:
|
||||
return self._config_cache[bucket_name]
|
||||
|
||||
config_path = self._bucket_lock_config_path(bucket_name)
|
||||
if not config_path.exists():
|
||||
return ObjectLockConfig(enabled=False)
|
||||
|
||||
try:
|
||||
data = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
config = ObjectLockConfig.from_dict(data)
|
||||
self._config_cache[bucket_name] = config
|
||||
return config
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return ObjectLockConfig(enabled=False)
|
||||
|
||||
def set_bucket_lock_config(self, bucket_name: str, config: ObjectLockConfig) -> None:
|
||||
config_path = self._bucket_lock_config_path(bucket_name)
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
config_path.write_text(json.dumps(config.to_dict()), encoding="utf-8")
|
||||
self._config_cache[bucket_name] = config
|
||||
|
||||
def enable_bucket_lock(self, bucket_name: str) -> None:
|
||||
config = self.get_bucket_lock_config(bucket_name)
|
||||
config.enabled = True
|
||||
self.set_bucket_lock_config(bucket_name, config)
|
||||
|
||||
def is_bucket_lock_enabled(self, bucket_name: str) -> bool:
|
||||
return self.get_bucket_lock_config(bucket_name).enabled
|
||||
|
||||
def get_object_retention(self, bucket_name: str, object_key: str) -> Optional[ObjectLockRetention]:
|
||||
meta_path = self._object_lock_meta_path(bucket_name, object_key)
|
||||
if not meta_path.exists():
|
||||
return None
|
||||
try:
|
||||
data = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
return ObjectLockRetention.from_dict(data.get("retention", {}))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return None
|
||||
|
||||
def set_object_retention(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_key: str,
|
||||
retention: ObjectLockRetention,
|
||||
bypass_governance: bool = False,
|
||||
) -> None:
|
||||
existing = self.get_object_retention(bucket_name, object_key)
|
||||
if existing and not existing.is_expired():
|
||||
if existing.mode == RetentionMode.COMPLIANCE:
|
||||
raise ObjectLockError(
|
||||
"Cannot modify retention on object with COMPLIANCE mode until retention expires"
|
||||
)
|
||||
if existing.mode == RetentionMode.GOVERNANCE and not bypass_governance:
|
||||
raise ObjectLockError(
|
||||
"Cannot modify GOVERNANCE retention without bypass-governance permission"
|
||||
)
|
||||
|
||||
meta_path = self._object_lock_meta_path(bucket_name, object_key)
|
||||
meta_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
existing_data: Dict[str, Any] = {}
|
||||
if meta_path.exists():
|
||||
try:
|
||||
existing_data = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
pass
|
||||
|
||||
existing_data["retention"] = retention.to_dict()
|
||||
meta_path.write_text(json.dumps(existing_data), encoding="utf-8")
|
||||
|
||||
def get_legal_hold(self, bucket_name: str, object_key: str) -> bool:
|
||||
meta_path = self._object_lock_meta_path(bucket_name, object_key)
|
||||
if not meta_path.exists():
|
||||
return False
|
||||
try:
|
||||
data = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
return data.get("legal_hold", False)
|
||||
except (json.JSONDecodeError, OSError):
|
||||
return False
|
||||
|
||||
def set_legal_hold(self, bucket_name: str, object_key: str, enabled: bool) -> None:
|
||||
meta_path = self._object_lock_meta_path(bucket_name, object_key)
|
||||
meta_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
existing_data: Dict[str, Any] = {}
|
||||
if meta_path.exists():
|
||||
try:
|
||||
existing_data = json.loads(meta_path.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
pass
|
||||
|
||||
existing_data["legal_hold"] = enabled
|
||||
meta_path.write_text(json.dumps(existing_data), encoding="utf-8")
|
||||
|
||||
def can_delete_object(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_key: str,
|
||||
bypass_governance: bool = False,
|
||||
) -> tuple[bool, str]:
|
||||
if self.get_legal_hold(bucket_name, object_key):
|
||||
return False, "Object is under legal hold"
|
||||
|
||||
retention = self.get_object_retention(bucket_name, object_key)
|
||||
if retention and not retention.is_expired():
|
||||
if retention.mode == RetentionMode.COMPLIANCE:
|
||||
return False, f"Object is locked in COMPLIANCE mode until {retention.retain_until_date.isoformat()}"
|
||||
if retention.mode == RetentionMode.GOVERNANCE:
|
||||
if not bypass_governance:
|
||||
return False, f"Object is locked in GOVERNANCE mode until {retention.retain_until_date.isoformat()}"
|
||||
|
||||
return True, ""
|
||||
|
||||
def can_overwrite_object(
|
||||
self,
|
||||
bucket_name: str,
|
||||
object_key: str,
|
||||
bypass_governance: bool = False,
|
||||
) -> tuple[bool, str]:
|
||||
return self.can_delete_object(bucket_name, object_key, bypass_governance)
|
||||
|
||||
def delete_object_lock_metadata(self, bucket_name: str, object_key: str) -> None:
|
||||
meta_path = self._object_lock_meta_path(bucket_name, object_key)
|
||||
try:
|
||||
if meta_path.exists():
|
||||
meta_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
296
app/operation_metrics.py
Normal file
296
app/operation_metrics.py
Normal file
@@ -0,0 +1,296 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
MAX_LATENCY_SAMPLES = 5000
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class OperationStats:
|
||||
count: int = 0
|
||||
success_count: int = 0
|
||||
error_count: int = 0
|
||||
latency_sum_ms: float = 0.0
|
||||
latency_min_ms: float = float("inf")
|
||||
latency_max_ms: float = 0.0
|
||||
bytes_in: int = 0
|
||||
bytes_out: int = 0
|
||||
latency_samples: List[float] = field(default_factory=list)
|
||||
|
||||
@staticmethod
|
||||
def _compute_percentile(sorted_data: List[float], p: float) -> float:
|
||||
if not sorted_data:
|
||||
return 0.0
|
||||
k = (len(sorted_data) - 1) * (p / 100.0)
|
||||
f = int(k)
|
||||
c = min(f + 1, len(sorted_data) - 1)
|
||||
d = k - f
|
||||
return sorted_data[f] + d * (sorted_data[c] - sorted_data[f])
|
||||
|
||||
def record(self, latency_ms: float, success: bool, bytes_in: int = 0, bytes_out: int = 0) -> None:
|
||||
self.count += 1
|
||||
if success:
|
||||
self.success_count += 1
|
||||
else:
|
||||
self.error_count += 1
|
||||
self.latency_sum_ms += latency_ms
|
||||
if latency_ms < self.latency_min_ms:
|
||||
self.latency_min_ms = latency_ms
|
||||
if latency_ms > self.latency_max_ms:
|
||||
self.latency_max_ms = latency_ms
|
||||
self.bytes_in += bytes_in
|
||||
self.bytes_out += bytes_out
|
||||
if len(self.latency_samples) < MAX_LATENCY_SAMPLES:
|
||||
self.latency_samples.append(latency_ms)
|
||||
else:
|
||||
j = random.randint(0, self.count - 1)
|
||||
if j < MAX_LATENCY_SAMPLES:
|
||||
self.latency_samples[j] = latency_ms
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
avg_latency = self.latency_sum_ms / self.count if self.count > 0 else 0.0
|
||||
min_latency = self.latency_min_ms if self.latency_min_ms != float("inf") else 0.0
|
||||
sorted_latencies = sorted(self.latency_samples)
|
||||
return {
|
||||
"count": self.count,
|
||||
"success_count": self.success_count,
|
||||
"error_count": self.error_count,
|
||||
"latency_avg_ms": round(avg_latency, 2),
|
||||
"latency_min_ms": round(min_latency, 2),
|
||||
"latency_max_ms": round(self.latency_max_ms, 2),
|
||||
"latency_p50_ms": round(self._compute_percentile(sorted_latencies, 50), 2),
|
||||
"latency_p95_ms": round(self._compute_percentile(sorted_latencies, 95), 2),
|
||||
"latency_p99_ms": round(self._compute_percentile(sorted_latencies, 99), 2),
|
||||
"bytes_in": self.bytes_in,
|
||||
"bytes_out": self.bytes_out,
|
||||
}
|
||||
|
||||
def merge(self, other: "OperationStats") -> None:
|
||||
self.count += other.count
|
||||
self.success_count += other.success_count
|
||||
self.error_count += other.error_count
|
||||
self.latency_sum_ms += other.latency_sum_ms
|
||||
if other.latency_min_ms < self.latency_min_ms:
|
||||
self.latency_min_ms = other.latency_min_ms
|
||||
if other.latency_max_ms > self.latency_max_ms:
|
||||
self.latency_max_ms = other.latency_max_ms
|
||||
self.bytes_in += other.bytes_in
|
||||
self.bytes_out += other.bytes_out
|
||||
combined = self.latency_samples + other.latency_samples
|
||||
if len(combined) > MAX_LATENCY_SAMPLES:
|
||||
random.shuffle(combined)
|
||||
combined = combined[:MAX_LATENCY_SAMPLES]
|
||||
self.latency_samples = combined
|
||||
|
||||
|
||||
@dataclass
|
||||
class MetricsSnapshot:
|
||||
timestamp: datetime
|
||||
window_seconds: int
|
||||
by_method: Dict[str, Dict[str, Any]]
|
||||
by_endpoint: Dict[str, Dict[str, Any]]
|
||||
by_status_class: Dict[str, int]
|
||||
error_codes: Dict[str, int]
|
||||
totals: Dict[str, Any]
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"timestamp": self.timestamp.isoformat(),
|
||||
"window_seconds": self.window_seconds,
|
||||
"by_method": self.by_method,
|
||||
"by_endpoint": self.by_endpoint,
|
||||
"by_status_class": self.by_status_class,
|
||||
"error_codes": self.error_codes,
|
||||
"totals": self.totals,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "MetricsSnapshot":
|
||||
return cls(
|
||||
timestamp=datetime.fromisoformat(data["timestamp"]),
|
||||
window_seconds=data.get("window_seconds", 300),
|
||||
by_method=data.get("by_method", {}),
|
||||
by_endpoint=data.get("by_endpoint", {}),
|
||||
by_status_class=data.get("by_status_class", {}),
|
||||
error_codes=data.get("error_codes", {}),
|
||||
totals=data.get("totals", {}),
|
||||
)
|
||||
|
||||
|
||||
class OperationMetricsCollector:
|
||||
def __init__(
|
||||
self,
|
||||
storage_root: Path,
|
||||
interval_minutes: int = 5,
|
||||
retention_hours: int = 24,
|
||||
):
|
||||
self.storage_root = storage_root
|
||||
self.interval_seconds = interval_minutes * 60
|
||||
self.retention_hours = retention_hours
|
||||
self._lock = threading.Lock()
|
||||
self._by_method: Dict[str, OperationStats] = defaultdict(OperationStats)
|
||||
self._by_endpoint: Dict[str, OperationStats] = defaultdict(OperationStats)
|
||||
self._by_status_class: Dict[str, int] = {}
|
||||
self._error_codes: Dict[str, int] = {}
|
||||
self._totals = OperationStats()
|
||||
self._window_start = time.time()
|
||||
self._shutdown = threading.Event()
|
||||
self._snapshots: List[MetricsSnapshot] = []
|
||||
|
||||
self._load_history()
|
||||
|
||||
self._snapshot_thread = threading.Thread(
|
||||
target=self._snapshot_loop, name="operation-metrics-snapshot", daemon=True
|
||||
)
|
||||
self._snapshot_thread.start()
|
||||
|
||||
def _config_path(self) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "config" / "operation_metrics.json"
|
||||
|
||||
def _load_history(self) -> None:
|
||||
config_path = self._config_path()
|
||||
if not config_path.exists():
|
||||
return
|
||||
try:
|
||||
data = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
snapshots_data = data.get("snapshots", [])
|
||||
self._snapshots = [MetricsSnapshot.from_dict(s) for s in snapshots_data]
|
||||
self._prune_old_snapshots()
|
||||
except (json.JSONDecodeError, OSError, KeyError) as e:
|
||||
logger.warning(f"Failed to load operation metrics history: {e}")
|
||||
|
||||
def _save_history(self) -> None:
|
||||
config_path = self._config_path()
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
data = {"snapshots": [s.to_dict() for s in self._snapshots]}
|
||||
config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to save operation metrics history: {e}")
|
||||
|
||||
def _prune_old_snapshots(self) -> None:
|
||||
if not self._snapshots:
|
||||
return
|
||||
cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600)
|
||||
self._snapshots = [
|
||||
s for s in self._snapshots if s.timestamp.timestamp() > cutoff
|
||||
]
|
||||
|
||||
def _snapshot_loop(self) -> None:
|
||||
while not self._shutdown.is_set():
|
||||
self._shutdown.wait(timeout=self.interval_seconds)
|
||||
if not self._shutdown.is_set():
|
||||
self._take_snapshot()
|
||||
|
||||
def _take_snapshot(self) -> None:
|
||||
with self._lock:
|
||||
now = datetime.now(timezone.utc)
|
||||
window_seconds = int(time.time() - self._window_start)
|
||||
|
||||
snapshot = MetricsSnapshot(
|
||||
timestamp=now,
|
||||
window_seconds=window_seconds,
|
||||
by_method={k: v.to_dict() for k, v in self._by_method.items()},
|
||||
by_endpoint={k: v.to_dict() for k, v in self._by_endpoint.items()},
|
||||
by_status_class=dict(self._by_status_class),
|
||||
error_codes=dict(self._error_codes),
|
||||
totals=self._totals.to_dict(),
|
||||
)
|
||||
|
||||
self._snapshots.append(snapshot)
|
||||
self._prune_old_snapshots()
|
||||
self._save_history()
|
||||
|
||||
self._by_method = defaultdict(OperationStats)
|
||||
self._by_endpoint = defaultdict(OperationStats)
|
||||
self._by_status_class.clear()
|
||||
self._error_codes.clear()
|
||||
self._totals = OperationStats()
|
||||
self._window_start = time.time()
|
||||
|
||||
def record_request(
|
||||
self,
|
||||
method: str,
|
||||
endpoint_type: str,
|
||||
status_code: int,
|
||||
latency_ms: float,
|
||||
bytes_in: int = 0,
|
||||
bytes_out: int = 0,
|
||||
error_code: Optional[str] = None,
|
||||
) -> None:
|
||||
success = 200 <= status_code < 400
|
||||
status_class = f"{status_code // 100}xx"
|
||||
|
||||
with self._lock:
|
||||
self._by_method[method].record(latency_ms, success, bytes_in, bytes_out)
|
||||
self._by_endpoint[endpoint_type].record(latency_ms, success, bytes_in, bytes_out)
|
||||
|
||||
self._by_status_class[status_class] = self._by_status_class.get(status_class, 0) + 1
|
||||
|
||||
if error_code:
|
||||
self._error_codes[error_code] = self._error_codes.get(error_code, 0) + 1
|
||||
|
||||
self._totals.record(latency_ms, success, bytes_in, bytes_out)
|
||||
|
||||
def get_current_stats(self) -> Dict[str, Any]:
|
||||
with self._lock:
|
||||
window_seconds = int(time.time() - self._window_start)
|
||||
return {
|
||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
||||
"window_seconds": window_seconds,
|
||||
"by_method": {k: v.to_dict() for k, v in self._by_method.items()},
|
||||
"by_endpoint": {k: v.to_dict() for k, v in self._by_endpoint.items()},
|
||||
"by_status_class": dict(self._by_status_class),
|
||||
"error_codes": dict(self._error_codes),
|
||||
"totals": self._totals.to_dict(),
|
||||
}
|
||||
|
||||
def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]:
|
||||
with self._lock:
|
||||
snapshots = list(self._snapshots)
|
||||
|
||||
if hours:
|
||||
cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
|
||||
snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff]
|
||||
|
||||
return [s.to_dict() for s in snapshots]
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._shutdown.set()
|
||||
self._take_snapshot()
|
||||
self._snapshot_thread.join(timeout=5.0)
|
||||
|
||||
|
||||
def classify_endpoint(path: str) -> str:
|
||||
if not path or path == "/":
|
||||
return "service"
|
||||
|
||||
path = path.rstrip("/")
|
||||
|
||||
if path.startswith("/ui"):
|
||||
return "ui"
|
||||
|
||||
if path.startswith("/kms"):
|
||||
return "kms"
|
||||
|
||||
if path.startswith("/myfsio"):
|
||||
return "service"
|
||||
|
||||
parts = path.lstrip("/").split("/")
|
||||
if len(parts) == 0:
|
||||
return "service"
|
||||
elif len(parts) == 1:
|
||||
return "bucket"
|
||||
else:
|
||||
return "object"
|
||||
@@ -1,4 +1,3 @@
|
||||
"""Background replication worker."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
@@ -9,7 +8,7 @@ import time
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Dict, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
@@ -22,11 +21,43 @@ from .storage import ObjectStorage, StorageError
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
|
||||
REPLICATION_CONNECT_TIMEOUT = 5
|
||||
REPLICATION_READ_TIMEOUT = 30
|
||||
|
||||
REPLICATION_MODE_NEW_ONLY = "new_only"
|
||||
REPLICATION_MODE_ALL = "all"
|
||||
REPLICATION_MODE_BIDIRECTIONAL = "bidirectional"
|
||||
|
||||
|
||||
def _create_s3_client(
|
||||
connection: RemoteConnection,
|
||||
*,
|
||||
health_check: bool = False,
|
||||
connect_timeout: int = 5,
|
||||
read_timeout: int = 30,
|
||||
max_retries: int = 2,
|
||||
) -> Any:
|
||||
"""Create a boto3 S3 client for the given connection.
|
||||
Args:
|
||||
connection: Remote S3 connection configuration
|
||||
health_check: If True, use minimal retries for quick health checks
|
||||
"""
|
||||
config = Config(
|
||||
user_agent_extra=REPLICATION_USER_AGENT,
|
||||
connect_timeout=connect_timeout,
|
||||
read_timeout=read_timeout,
|
||||
retries={'max_attempts': 1 if health_check else max_retries},
|
||||
signature_version='s3v4',
|
||||
s3={'addressing_style': 'path'},
|
||||
request_checksum_calculation='when_required',
|
||||
response_checksum_validation='when_required',
|
||||
)
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=connection.endpoint_url,
|
||||
aws_access_key_id=connection.access_key,
|
||||
aws_secret_access_key=connection.secret_key,
|
||||
region_name=connection.region or 'us-east-1',
|
||||
config=config,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -61,16 +92,53 @@ class ReplicationStats:
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReplicationFailure:
|
||||
object_key: str
|
||||
error_message: str
|
||||
timestamp: float
|
||||
failure_count: int
|
||||
bucket_name: str
|
||||
action: str
|
||||
last_error_code: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"object_key": self.object_key,
|
||||
"error_message": self.error_message,
|
||||
"timestamp": self.timestamp,
|
||||
"failure_count": self.failure_count,
|
||||
"bucket_name": self.bucket_name,
|
||||
"action": self.action,
|
||||
"last_error_code": self.last_error_code,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "ReplicationFailure":
|
||||
return cls(
|
||||
object_key=data["object_key"],
|
||||
error_message=data["error_message"],
|
||||
timestamp=data["timestamp"],
|
||||
failure_count=data["failure_count"],
|
||||
bucket_name=data["bucket_name"],
|
||||
action=data["action"],
|
||||
last_error_code=data.get("last_error_code"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ReplicationRule:
|
||||
bucket_name: str
|
||||
target_connection_id: str
|
||||
target_bucket: str
|
||||
enabled: bool = True
|
||||
mode: str = REPLICATION_MODE_NEW_ONLY
|
||||
mode: str = REPLICATION_MODE_NEW_ONLY
|
||||
created_at: Optional[float] = None
|
||||
stats: ReplicationStats = field(default_factory=ReplicationStats)
|
||||
|
||||
sync_deletions: bool = True
|
||||
last_pull_at: Optional[float] = None
|
||||
filter_prefix: Optional[str] = None
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"bucket_name": self.bucket_name,
|
||||
@@ -80,8 +148,11 @@ class ReplicationRule:
|
||||
"mode": self.mode,
|
||||
"created_at": self.created_at,
|
||||
"stats": self.stats.to_dict(),
|
||||
"sync_deletions": self.sync_deletions,
|
||||
"last_pull_at": self.last_pull_at,
|
||||
"filter_prefix": self.filter_prefix,
|
||||
}
|
||||
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> "ReplicationRule":
|
||||
stats_data = data.pop("stats", {})
|
||||
@@ -89,21 +160,147 @@ class ReplicationRule:
|
||||
data["mode"] = REPLICATION_MODE_NEW_ONLY
|
||||
if "created_at" not in data:
|
||||
data["created_at"] = None
|
||||
if "sync_deletions" not in data:
|
||||
data["sync_deletions"] = True
|
||||
if "last_pull_at" not in data:
|
||||
data["last_pull_at"] = None
|
||||
if "filter_prefix" not in data:
|
||||
data["filter_prefix"] = None
|
||||
rule = cls(**data)
|
||||
rule.stats = ReplicationStats.from_dict(stats_data) if stats_data else ReplicationStats()
|
||||
return rule
|
||||
|
||||
|
||||
class ReplicationFailureStore:
|
||||
def __init__(self, storage_root: Path, max_failures_per_bucket: int = 50) -> None:
|
||||
self.storage_root = storage_root
|
||||
self.max_failures_per_bucket = max_failures_per_bucket
|
||||
self._lock = threading.Lock()
|
||||
self._cache: Dict[str, List[ReplicationFailure]] = {}
|
||||
|
||||
def _get_failures_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "replication_failures.json"
|
||||
|
||||
def _load_from_disk(self, bucket_name: str) -> List[ReplicationFailure]:
|
||||
path = self._get_failures_path(bucket_name)
|
||||
if not path.exists():
|
||||
return []
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
data = json.load(f)
|
||||
return [ReplicationFailure.from_dict(d) for d in data.get("failures", [])]
|
||||
except (OSError, ValueError, KeyError) as e:
|
||||
logger.error(f"Failed to load replication failures for {bucket_name}: {e}")
|
||||
return []
|
||||
|
||||
def _save_to_disk(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
|
||||
path = self._get_failures_path(bucket_name)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {"failures": [f.to_dict() for f in failures[:self.max_failures_per_bucket]]}
|
||||
try:
|
||||
with open(path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to save replication failures for {bucket_name}: {e}")
|
||||
|
||||
def load_failures(self, bucket_name: str) -> List[ReplicationFailure]:
|
||||
if bucket_name in self._cache:
|
||||
return list(self._cache[bucket_name])
|
||||
failures = self._load_from_disk(bucket_name)
|
||||
self._cache[bucket_name] = failures
|
||||
return list(failures)
|
||||
|
||||
def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
|
||||
trimmed = failures[:self.max_failures_per_bucket]
|
||||
self._cache[bucket_name] = trimmed
|
||||
self._save_to_disk(bucket_name, trimmed)
|
||||
|
||||
def add_failure(self, bucket_name: str, failure: ReplicationFailure) -> None:
|
||||
with self._lock:
|
||||
failures = self.load_failures(bucket_name)
|
||||
existing = next((f for f in failures if f.object_key == failure.object_key), None)
|
||||
if existing:
|
||||
existing.failure_count += 1
|
||||
existing.timestamp = failure.timestamp
|
||||
existing.error_message = failure.error_message
|
||||
existing.last_error_code = failure.last_error_code
|
||||
else:
|
||||
failures.insert(0, failure)
|
||||
self.save_failures(bucket_name, failures)
|
||||
|
||||
def remove_failure(self, bucket_name: str, object_key: str) -> bool:
|
||||
with self._lock:
|
||||
failures = self.load_failures(bucket_name)
|
||||
original_len = len(failures)
|
||||
failures = [f for f in failures if f.object_key != object_key]
|
||||
if len(failures) < original_len:
|
||||
self.save_failures(bucket_name, failures)
|
||||
return True
|
||||
return False
|
||||
|
||||
def clear_failures(self, bucket_name: str) -> None:
|
||||
with self._lock:
|
||||
self._cache.pop(bucket_name, None)
|
||||
path = self._get_failures_path(bucket_name)
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
|
||||
def get_failure(self, bucket_name: str, object_key: str) -> Optional[ReplicationFailure]:
|
||||
failures = self.load_failures(bucket_name)
|
||||
return next((f for f in failures if f.object_key == object_key), None)
|
||||
|
||||
def get_failure_count(self, bucket_name: str) -> int:
|
||||
return len(self.load_failures(bucket_name))
|
||||
|
||||
|
||||
class ReplicationManager:
|
||||
def __init__(self, storage: ObjectStorage, connections: ConnectionStore, rules_path: Path) -> None:
|
||||
def __init__(
|
||||
self,
|
||||
storage: ObjectStorage,
|
||||
connections: ConnectionStore,
|
||||
rules_path: Path,
|
||||
storage_root: Path,
|
||||
connect_timeout: int = 5,
|
||||
read_timeout: int = 30,
|
||||
max_retries: int = 2,
|
||||
streaming_threshold_bytes: int = 10 * 1024 * 1024,
|
||||
max_failures_per_bucket: int = 50,
|
||||
) -> None:
|
||||
self.storage = storage
|
||||
self.connections = connections
|
||||
self.rules_path = rules_path
|
||||
self.storage_root = storage_root
|
||||
self.connect_timeout = connect_timeout
|
||||
self.read_timeout = read_timeout
|
||||
self.max_retries = max_retries
|
||||
self.streaming_threshold_bytes = streaming_threshold_bytes
|
||||
self._rules: Dict[str, ReplicationRule] = {}
|
||||
self._stats_lock = threading.Lock()
|
||||
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
|
||||
self._shutdown = False
|
||||
self.failure_store = ReplicationFailureStore(storage_root, max_failures_per_bucket)
|
||||
self.reload_rules()
|
||||
|
||||
def _create_client(self, connection: RemoteConnection, *, health_check: bool = False) -> Any:
|
||||
"""Create an S3 client with the manager's configured timeouts."""
|
||||
return _create_s3_client(
|
||||
connection,
|
||||
health_check=health_check,
|
||||
connect_timeout=self.connect_timeout,
|
||||
read_timeout=self.read_timeout,
|
||||
max_retries=self.max_retries,
|
||||
)
|
||||
|
||||
def shutdown(self, wait: bool = True) -> None:
|
||||
"""Shutdown the replication executor gracefully.
|
||||
|
||||
Args:
|
||||
wait: If True, wait for pending tasks to complete
|
||||
"""
|
||||
self._shutdown = True
|
||||
self._executor.shutdown(wait=wait)
|
||||
logger.info("Replication manager shut down")
|
||||
|
||||
def reload_rules(self) -> None:
|
||||
if not self.rules_path.exists():
|
||||
self._rules = {}
|
||||
@@ -124,25 +321,12 @@ class ReplicationManager:
|
||||
|
||||
def check_endpoint_health(self, connection: RemoteConnection) -> bool:
|
||||
"""Check if a remote endpoint is reachable and responsive.
|
||||
|
||||
|
||||
Returns True if endpoint is healthy, False otherwise.
|
||||
Uses short timeouts to prevent blocking.
|
||||
"""
|
||||
try:
|
||||
config = Config(
|
||||
user_agent_extra=REPLICATION_USER_AGENT,
|
||||
connect_timeout=REPLICATION_CONNECT_TIMEOUT,
|
||||
read_timeout=REPLICATION_READ_TIMEOUT,
|
||||
retries={'max_attempts': 1}
|
||||
)
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=connection.endpoint_url,
|
||||
aws_access_key_id=connection.access_key,
|
||||
aws_secret_access_key=connection.secret_key,
|
||||
region_name=connection.region,
|
||||
config=config,
|
||||
)
|
||||
s3 = self._create_client(connection, health_check=True)
|
||||
s3.list_buckets()
|
||||
return True
|
||||
except Exception as e:
|
||||
@@ -152,10 +336,19 @@ class ReplicationManager:
|
||||
def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]:
|
||||
return self._rules.get(bucket_name)
|
||||
|
||||
def list_rules(self) -> List[ReplicationRule]:
|
||||
return list(self._rules.values())
|
||||
|
||||
def set_rule(self, rule: ReplicationRule) -> None:
|
||||
old_rule = self._rules.get(rule.bucket_name)
|
||||
was_all_mode = old_rule and old_rule.mode == REPLICATION_MODE_ALL if old_rule else False
|
||||
self._rules[rule.bucket_name] = rule
|
||||
self.save_rules()
|
||||
|
||||
if rule.mode == REPLICATION_MODE_ALL and rule.enabled and not was_all_mode:
|
||||
logger.info(f"Replication mode ALL enabled for {rule.bucket_name}, triggering sync of existing objects")
|
||||
self._executor.submit(self.replicate_existing_objects, rule.bucket_name)
|
||||
|
||||
def delete_rule(self, bucket_name: str) -> None:
|
||||
if bucket_name in self._rules:
|
||||
del self._rules[bucket_name]
|
||||
@@ -184,15 +377,9 @@ class ReplicationManager:
|
||||
try:
|
||||
source_objects = self.storage.list_objects_all(bucket_name)
|
||||
source_keys = {obj.key: obj.size for obj in source_objects}
|
||||
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=connection.endpoint_url,
|
||||
aws_access_key_id=connection.access_key,
|
||||
aws_secret_access_key=connection.secret_key,
|
||||
region_name=connection.region,
|
||||
)
|
||||
|
||||
|
||||
s3 = self._create_client(connection)
|
||||
|
||||
dest_keys = set()
|
||||
bytes_synced = 0
|
||||
paginator = s3.get_paginator('list_objects_v2')
|
||||
@@ -257,13 +444,7 @@ class ReplicationManager:
|
||||
raise ValueError(f"Connection {connection_id} not found")
|
||||
|
||||
try:
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=connection.endpoint_url,
|
||||
aws_access_key_id=connection.access_key,
|
||||
aws_secret_access_key=connection.secret_key,
|
||||
region_name=connection.region,
|
||||
)
|
||||
s3 = self._create_client(connection)
|
||||
s3.create_bucket(Bucket=bucket_name)
|
||||
except ClientError as e:
|
||||
logger.error(f"Failed to create remote bucket {bucket_name}: {e}")
|
||||
@@ -286,49 +467,46 @@ class ReplicationManager:
|
||||
self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, action)
|
||||
|
||||
def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection, action: str) -> None:
|
||||
if self._shutdown:
|
||||
return
|
||||
|
||||
current_rule = self.get_rule(bucket_name)
|
||||
if not current_rule or not current_rule.enabled:
|
||||
logger.debug(f"Replication skipped for {bucket_name}/{object_key}: rule disabled or removed")
|
||||
return
|
||||
|
||||
if ".." in object_key or object_key.startswith("/") or object_key.startswith("\\"):
|
||||
logger.error(f"Invalid object key in replication (path traversal attempt): {object_key}")
|
||||
return
|
||||
|
||||
|
||||
try:
|
||||
from .storage import ObjectStorage
|
||||
ObjectStorage._sanitize_object_key(object_key)
|
||||
except StorageError as e:
|
||||
logger.error(f"Object key validation failed in replication: {e}")
|
||||
return
|
||||
|
||||
file_size = 0
|
||||
|
||||
try:
|
||||
config = Config(
|
||||
user_agent_extra=REPLICATION_USER_AGENT,
|
||||
connect_timeout=REPLICATION_CONNECT_TIMEOUT,
|
||||
read_timeout=REPLICATION_READ_TIMEOUT,
|
||||
retries={'max_attempts': 2},
|
||||
signature_version='s3v4',
|
||||
s3={
|
||||
'addressing_style': 'path',
|
||||
},
|
||||
# Disable SDK automatic checksums - they cause SignatureDoesNotMatch errors
|
||||
# with S3-compatible servers that don't support CRC32 checksum headers
|
||||
request_checksum_calculation='when_required',
|
||||
response_checksum_validation='when_required',
|
||||
)
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
endpoint_url=conn.endpoint_url,
|
||||
aws_access_key_id=conn.access_key,
|
||||
aws_secret_access_key=conn.secret_key,
|
||||
region_name=conn.region or 'us-east-1',
|
||||
config=config,
|
||||
)
|
||||
s3 = self._create_client(conn)
|
||||
|
||||
if action == "delete":
|
||||
try:
|
||||
s3.delete_object(Bucket=rule.target_bucket, Key=object_key)
|
||||
logger.info(f"Replicated DELETE {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
|
||||
self._update_last_sync(bucket_name, object_key)
|
||||
self.failure_store.remove_failure(bucket_name, object_key)
|
||||
except ClientError as e:
|
||||
error_code = e.response.get('Error', {}).get('Code')
|
||||
logger.error(f"Replication DELETE failed for {bucket_name}/{object_key}: {e}")
|
||||
self.failure_store.add_failure(bucket_name, ReplicationFailure(
|
||||
object_key=object_key,
|
||||
error_message=str(e),
|
||||
timestamp=time.time(),
|
||||
failure_count=1,
|
||||
bucket_name=bucket_name,
|
||||
action="delete",
|
||||
last_error_code=error_code,
|
||||
))
|
||||
return
|
||||
|
||||
try:
|
||||
@@ -337,34 +515,40 @@ class ReplicationManager:
|
||||
logger.error(f"Source object not found: {bucket_name}/{object_key}")
|
||||
return
|
||||
|
||||
# Don't replicate metadata - destination server will generate its own
|
||||
# __etag__ and __size__. Replicating them causes signature mismatches when they have None/empty values.
|
||||
|
||||
content_type, _ = mimetypes.guess_type(path)
|
||||
file_size = path.stat().st_size
|
||||
|
||||
logger.info(f"Replicating {bucket_name}/{object_key}: Size={file_size}, ContentType={content_type}")
|
||||
|
||||
def do_put_object() -> None:
|
||||
"""Helper to upload object.
|
||||
|
||||
Reads the file content into memory first to avoid signature calculation
|
||||
issues with certain binary file types (like GIFs) when streaming.
|
||||
Do NOT set ContentLength explicitly - boto3 calculates it from the bytes
|
||||
and setting it manually can cause SignatureDoesNotMatch errors.
|
||||
def do_upload() -> None:
|
||||
"""Upload object using appropriate method based on file size.
|
||||
|
||||
For small files (< 10 MiB): Read into memory for simpler handling
|
||||
For large files: Use streaming upload to avoid memory issues
|
||||
"""
|
||||
file_content = path.read_bytes()
|
||||
put_kwargs = {
|
||||
"Bucket": rule.target_bucket,
|
||||
"Key": object_key,
|
||||
"Body": file_content,
|
||||
}
|
||||
extra_args = {}
|
||||
if content_type:
|
||||
put_kwargs["ContentType"] = content_type
|
||||
s3.put_object(**put_kwargs)
|
||||
extra_args["ContentType"] = content_type
|
||||
|
||||
if file_size >= self.streaming_threshold_bytes:
|
||||
s3.upload_file(
|
||||
str(path),
|
||||
rule.target_bucket,
|
||||
object_key,
|
||||
ExtraArgs=extra_args if extra_args else None,
|
||||
)
|
||||
else:
|
||||
file_content = path.read_bytes()
|
||||
put_kwargs = {
|
||||
"Bucket": rule.target_bucket,
|
||||
"Key": object_key,
|
||||
"Body": file_content,
|
||||
**extra_args,
|
||||
}
|
||||
s3.put_object(**put_kwargs)
|
||||
|
||||
try:
|
||||
do_put_object()
|
||||
do_upload()
|
||||
except (ClientError, S3UploadFailedError) as e:
|
||||
error_code = None
|
||||
if isinstance(e, ClientError):
|
||||
@@ -386,18 +570,98 @@ class ReplicationManager:
|
||||
bucket_ready = True
|
||||
else:
|
||||
logger.error(f"Failed to create target bucket {rule.target_bucket}: {bucket_err}")
|
||||
raise e
|
||||
|
||||
raise e
|
||||
|
||||
if bucket_ready:
|
||||
do_put_object()
|
||||
do_upload()
|
||||
else:
|
||||
raise e
|
||||
|
||||
|
||||
logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
|
||||
self._update_last_sync(bucket_name, object_key)
|
||||
self.failure_store.remove_failure(bucket_name, object_key)
|
||||
|
||||
except (ClientError, OSError, ValueError) as e:
|
||||
error_code = None
|
||||
if isinstance(e, ClientError):
|
||||
error_code = e.response.get('Error', {}).get('Code')
|
||||
logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}")
|
||||
except Exception:
|
||||
self.failure_store.add_failure(bucket_name, ReplicationFailure(
|
||||
object_key=object_key,
|
||||
error_message=str(e),
|
||||
timestamp=time.time(),
|
||||
failure_count=1,
|
||||
bucket_name=bucket_name,
|
||||
action=action,
|
||||
last_error_code=error_code,
|
||||
))
|
||||
except Exception as e:
|
||||
logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}")
|
||||
self.failure_store.add_failure(bucket_name, ReplicationFailure(
|
||||
object_key=object_key,
|
||||
error_message=str(e),
|
||||
timestamp=time.time(),
|
||||
failure_count=1,
|
||||
bucket_name=bucket_name,
|
||||
action=action,
|
||||
last_error_code=None,
|
||||
))
|
||||
|
||||
def get_failed_items(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[ReplicationFailure]:
|
||||
failures = self.failure_store.load_failures(bucket_name)
|
||||
return failures[offset:offset + limit]
|
||||
|
||||
def get_failure_count(self, bucket_name: str) -> int:
|
||||
return self.failure_store.get_failure_count(bucket_name)
|
||||
|
||||
def retry_failed_item(self, bucket_name: str, object_key: str) -> bool:
|
||||
failure = self.failure_store.get_failure(bucket_name, object_key)
|
||||
if not failure:
|
||||
return False
|
||||
|
||||
rule = self.get_rule(bucket_name)
|
||||
if not rule or not rule.enabled:
|
||||
return False
|
||||
|
||||
connection = self.connections.get(rule.target_connection_id)
|
||||
if not connection:
|
||||
logger.warning(f"Cannot retry: Connection {rule.target_connection_id} not found")
|
||||
return False
|
||||
|
||||
if not self.check_endpoint_health(connection):
|
||||
logger.warning(f"Cannot retry: Endpoint {connection.name} is not reachable")
|
||||
return False
|
||||
|
||||
self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, failure.action)
|
||||
return True
|
||||
|
||||
def retry_all_failed(self, bucket_name: str) -> Dict[str, int]:
|
||||
failures = self.failure_store.load_failures(bucket_name)
|
||||
if not failures:
|
||||
return {"submitted": 0, "skipped": 0}
|
||||
|
||||
rule = self.get_rule(bucket_name)
|
||||
if not rule or not rule.enabled:
|
||||
return {"submitted": 0, "skipped": len(failures)}
|
||||
|
||||
connection = self.connections.get(rule.target_connection_id)
|
||||
if not connection:
|
||||
logger.warning(f"Cannot retry: Connection {rule.target_connection_id} not found")
|
||||
return {"submitted": 0, "skipped": len(failures)}
|
||||
|
||||
if not self.check_endpoint_health(connection):
|
||||
logger.warning(f"Cannot retry: Endpoint {connection.name} is not reachable")
|
||||
return {"submitted": 0, "skipped": len(failures)}
|
||||
|
||||
submitted = 0
|
||||
for failure in failures:
|
||||
self._executor.submit(self._replicate_task, bucket_name, failure.object_key, rule, connection, failure.action)
|
||||
submitted += 1
|
||||
|
||||
return {"submitted": submitted, "skipped": 0}
|
||||
|
||||
def dismiss_failure(self, bucket_name: str, object_key: str) -> bool:
|
||||
return self.failure_store.remove_failure(bucket_name, object_key)
|
||||
|
||||
def clear_failures(self, bucket_name: str) -> None:
|
||||
self.failure_store.clear_failures(bucket_name)
|
||||
|
||||
2721
app/s3_api.py
2721
app/s3_api.py
File diff suppressed because it is too large
Load Diff
296
app/s3_client.py
Normal file
296
app/s3_client.py
Normal file
@@ -0,0 +1,296 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from typing import Any, Generator, Optional
|
||||
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import ClientError, EndpointConnectionError, ConnectionClosedError
|
||||
from flask import current_app, session
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
UI_PROXY_USER_AGENT = "MyFSIO-UIProxy/1.0"
|
||||
|
||||
_BOTO_ERROR_MAP = {
|
||||
"NoSuchBucket": 404,
|
||||
"NoSuchKey": 404,
|
||||
"NoSuchUpload": 404,
|
||||
"BucketAlreadyExists": 409,
|
||||
"BucketAlreadyOwnedByYou": 409,
|
||||
"BucketNotEmpty": 409,
|
||||
"AccessDenied": 403,
|
||||
"InvalidAccessKeyId": 403,
|
||||
"SignatureDoesNotMatch": 403,
|
||||
"InvalidBucketName": 400,
|
||||
"InvalidArgument": 400,
|
||||
"MalformedXML": 400,
|
||||
"EntityTooLarge": 400,
|
||||
"QuotaExceeded": 403,
|
||||
}
|
||||
|
||||
_UPLOAD_REGISTRY_MAX_AGE = 86400
|
||||
_UPLOAD_REGISTRY_CLEANUP_INTERVAL = 3600
|
||||
|
||||
|
||||
class UploadRegistry:
|
||||
def __init__(self) -> None:
|
||||
self._entries: dict[str, tuple[str, str, float]] = {}
|
||||
self._lock = threading.Lock()
|
||||
self._last_cleanup = time.monotonic()
|
||||
|
||||
def register(self, upload_id: str, bucket_name: str, object_key: str) -> None:
|
||||
with self._lock:
|
||||
self._entries[upload_id] = (bucket_name, object_key, time.monotonic())
|
||||
self._maybe_cleanup()
|
||||
|
||||
def get_key(self, upload_id: str, bucket_name: str) -> Optional[str]:
|
||||
with self._lock:
|
||||
entry = self._entries.get(upload_id)
|
||||
if entry is None:
|
||||
return None
|
||||
stored_bucket, key, created_at = entry
|
||||
if stored_bucket != bucket_name:
|
||||
return None
|
||||
if time.monotonic() - created_at > _UPLOAD_REGISTRY_MAX_AGE:
|
||||
del self._entries[upload_id]
|
||||
return None
|
||||
return key
|
||||
|
||||
def remove(self, upload_id: str) -> None:
|
||||
with self._lock:
|
||||
self._entries.pop(upload_id, None)
|
||||
|
||||
def _maybe_cleanup(self) -> None:
|
||||
now = time.monotonic()
|
||||
if now - self._last_cleanup < _UPLOAD_REGISTRY_CLEANUP_INTERVAL:
|
||||
return
|
||||
self._last_cleanup = now
|
||||
cutoff = now - _UPLOAD_REGISTRY_MAX_AGE
|
||||
stale = [uid for uid, (_, _, ts) in self._entries.items() if ts < cutoff]
|
||||
for uid in stale:
|
||||
del self._entries[uid]
|
||||
|
||||
|
||||
class S3ProxyClient:
|
||||
def __init__(self, api_base_url: str, region: str = "us-east-1") -> None:
|
||||
if not api_base_url:
|
||||
raise ValueError("api_base_url is required for S3ProxyClient")
|
||||
self._api_base_url = api_base_url.rstrip("/")
|
||||
self._region = region
|
||||
self.upload_registry = UploadRegistry()
|
||||
|
||||
@property
|
||||
def api_base_url(self) -> str:
|
||||
return self._api_base_url
|
||||
|
||||
def get_client(self, access_key: str, secret_key: str) -> Any:
|
||||
if not access_key or not secret_key:
|
||||
raise ValueError("Both access_key and secret_key are required")
|
||||
config = Config(
|
||||
user_agent_extra=UI_PROXY_USER_AGENT,
|
||||
connect_timeout=5,
|
||||
read_timeout=30,
|
||||
retries={"max_attempts": 0},
|
||||
signature_version="s3v4",
|
||||
s3={"addressing_style": "path"},
|
||||
request_checksum_calculation="when_required",
|
||||
response_checksum_validation="when_required",
|
||||
)
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=self._api_base_url,
|
||||
aws_access_key_id=access_key,
|
||||
aws_secret_access_key=secret_key,
|
||||
region_name=self._region,
|
||||
config=config,
|
||||
)
|
||||
|
||||
|
||||
def _get_proxy() -> S3ProxyClient:
|
||||
proxy = current_app.extensions.get("s3_proxy")
|
||||
if proxy is None:
|
||||
raise RuntimeError(
|
||||
"S3 proxy not configured. Set API_BASE_URL or run both API and UI servers."
|
||||
)
|
||||
return proxy
|
||||
|
||||
|
||||
def _get_session_creds() -> tuple[str, str]:
|
||||
secret_store = current_app.extensions["secret_store"]
|
||||
secret_store.purge_expired()
|
||||
token = session.get("cred_token")
|
||||
if not token:
|
||||
raise PermissionError("Not authenticated")
|
||||
creds = secret_store.peek(token)
|
||||
if not creds:
|
||||
raise PermissionError("Session expired")
|
||||
access_key = creds.get("access_key", "")
|
||||
secret_key = creds.get("secret_key", "")
|
||||
if not access_key or not secret_key:
|
||||
raise PermissionError("Invalid session credentials")
|
||||
return access_key, secret_key
|
||||
|
||||
|
||||
def get_session_s3_client() -> Any:
|
||||
proxy = _get_proxy()
|
||||
access_key, secret_key = _get_session_creds()
|
||||
return proxy.get_client(access_key, secret_key)
|
||||
|
||||
|
||||
def get_upload_registry() -> UploadRegistry:
|
||||
return _get_proxy().upload_registry
|
||||
|
||||
|
||||
def handle_client_error(exc: ClientError) -> tuple[dict[str, str], int]:
|
||||
error_info = exc.response.get("Error", {})
|
||||
code = error_info.get("Code", "InternalError")
|
||||
message = error_info.get("Message") or "S3 operation failed"
|
||||
http_status = _BOTO_ERROR_MAP.get(code)
|
||||
if http_status is None:
|
||||
http_status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 500)
|
||||
return {"error": message}, http_status
|
||||
|
||||
|
||||
def handle_connection_error(exc: Exception) -> tuple[dict[str, str], int]:
|
||||
logger.error("S3 API connection failed: %s", exc)
|
||||
return {"error": "S3 API server is unreachable. Ensure the API server is running."}, 502
|
||||
|
||||
|
||||
def format_datetime_display(dt: Any, display_tz: str = "UTC") -> str:
|
||||
from .ui import _format_datetime_display
|
||||
return _format_datetime_display(dt, display_tz)
|
||||
|
||||
|
||||
def format_datetime_iso(dt: Any, display_tz: str = "UTC") -> str:
|
||||
from .ui import _format_datetime_iso
|
||||
return _format_datetime_iso(dt, display_tz)
|
||||
|
||||
|
||||
def build_url_templates(bucket_name: str) -> dict[str, str]:
|
||||
from flask import url_for
|
||||
preview_t = url_for("ui.object_preview", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
delete_t = url_for("ui.delete_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
presign_t = url_for("ui.object_presign", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
versions_t = url_for("ui.object_versions", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
restore_t = url_for(
|
||||
"ui.restore_object_version",
|
||||
bucket_name=bucket_name,
|
||||
object_key="KEY_PLACEHOLDER",
|
||||
version_id="VERSION_ID_PLACEHOLDER",
|
||||
)
|
||||
tags_t = url_for("ui.object_tags", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
copy_t = url_for("ui.copy_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
move_t = url_for("ui.move_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
metadata_t = url_for("ui.object_metadata", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
|
||||
return {
|
||||
"preview": preview_t,
|
||||
"download": preview_t + "?download=1",
|
||||
"presign": presign_t,
|
||||
"delete": delete_t,
|
||||
"versions": versions_t,
|
||||
"restore": restore_t,
|
||||
"tags": tags_t,
|
||||
"copy": copy_t,
|
||||
"move": move_t,
|
||||
"metadata": metadata_t,
|
||||
}
|
||||
|
||||
|
||||
def translate_list_objects(
|
||||
boto3_response: dict[str, Any],
|
||||
url_templates: dict[str, str],
|
||||
display_tz: str = "UTC",
|
||||
versioning_enabled: bool = False,
|
||||
) -> dict[str, Any]:
|
||||
objects_data = []
|
||||
for obj in boto3_response.get("Contents", []):
|
||||
last_mod = obj["LastModified"]
|
||||
objects_data.append({
|
||||
"key": obj["Key"],
|
||||
"size": obj["Size"],
|
||||
"last_modified": last_mod.isoformat(),
|
||||
"last_modified_display": format_datetime_display(last_mod, display_tz),
|
||||
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
|
||||
"etag": obj.get("ETag", "").strip('"'),
|
||||
})
|
||||
return {
|
||||
"objects": objects_data,
|
||||
"is_truncated": boto3_response.get("IsTruncated", False),
|
||||
"next_continuation_token": boto3_response.get("NextContinuationToken"),
|
||||
"total_count": boto3_response.get("KeyCount", len(objects_data)),
|
||||
"versioning_enabled": versioning_enabled,
|
||||
"url_templates": url_templates,
|
||||
}
|
||||
|
||||
|
||||
def get_versioning_via_s3(client: Any, bucket_name: str) -> bool:
|
||||
try:
|
||||
resp = client.get_bucket_versioning(Bucket=bucket_name)
|
||||
return resp.get("Status") == "Enabled"
|
||||
except ClientError as exc:
|
||||
code = exc.response.get("Error", {}).get("Code", "")
|
||||
if code != "NoSuchBucket":
|
||||
logger.warning("Failed to check versioning for %s: %s", bucket_name, code)
|
||||
return False
|
||||
|
||||
|
||||
def stream_objects_ndjson(
|
||||
client: Any,
|
||||
bucket_name: str,
|
||||
prefix: Optional[str],
|
||||
url_templates: dict[str, str],
|
||||
display_tz: str = "UTC",
|
||||
versioning_enabled: bool = False,
|
||||
delimiter: Optional[str] = None,
|
||||
) -> Generator[str, None, None]:
|
||||
meta_line = json.dumps({
|
||||
"type": "meta",
|
||||
"versioning_enabled": versioning_enabled,
|
||||
"url_templates": url_templates,
|
||||
}) + "\n"
|
||||
yield meta_line
|
||||
|
||||
yield json.dumps({"type": "count", "total_count": 0}) + "\n"
|
||||
|
||||
kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": 1000}
|
||||
if prefix:
|
||||
kwargs["Prefix"] = prefix
|
||||
if delimiter:
|
||||
kwargs["Delimiter"] = delimiter
|
||||
|
||||
running_count = 0
|
||||
try:
|
||||
paginator = client.get_paginator("list_objects_v2")
|
||||
for page in paginator.paginate(**kwargs):
|
||||
for cp in page.get("CommonPrefixes", []):
|
||||
yield json.dumps({
|
||||
"type": "folder",
|
||||
"prefix": cp["Prefix"],
|
||||
}) + "\n"
|
||||
page_contents = page.get("Contents", [])
|
||||
for obj in page_contents:
|
||||
last_mod = obj["LastModified"]
|
||||
yield json.dumps({
|
||||
"type": "object",
|
||||
"key": obj["Key"],
|
||||
"size": obj["Size"],
|
||||
"last_modified": last_mod.isoformat(),
|
||||
"last_modified_display": format_datetime_display(last_mod, display_tz),
|
||||
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
|
||||
"etag": obj.get("ETag", "").strip('"'),
|
||||
}) + "\n"
|
||||
running_count += len(page_contents)
|
||||
yield json.dumps({"type": "count", "total_count": running_count}) + "\n"
|
||||
except ClientError as exc:
|
||||
error_msg = exc.response.get("Error", {}).get("Message", "S3 operation failed")
|
||||
yield json.dumps({"type": "error", "error": error_msg}) + "\n"
|
||||
return
|
||||
except (EndpointConnectionError, ConnectionClosedError):
|
||||
yield json.dumps({"type": "error", "error": "S3 API server is unreachable"}) + "\n"
|
||||
return
|
||||
|
||||
yield json.dumps({"type": "done"}) + "\n"
|
||||
@@ -1,4 +1,3 @@
|
||||
"""Ephemeral store for one-time secrets communicated to the UI."""
|
||||
from __future__ import annotations
|
||||
|
||||
import secrets
|
||||
@@ -19,6 +18,18 @@ class EphemeralSecretStore:
|
||||
self._store[token] = (payload, expires_at)
|
||||
return token
|
||||
|
||||
def peek(self, token: str | None) -> Any | None:
|
||||
if not token:
|
||||
return None
|
||||
entry = self._store.get(token)
|
||||
if not entry:
|
||||
return None
|
||||
payload, expires_at = entry
|
||||
if expires_at < time.time():
|
||||
self._store.pop(token, None)
|
||||
return None
|
||||
return payload
|
||||
|
||||
def pop(self, token: str | None) -> Any | None:
|
||||
if not token:
|
||||
return None
|
||||
|
||||
171
app/select_content.py
Normal file
171
app/select_content.py
Normal file
@@ -0,0 +1,171 @@
|
||||
"""S3 SelectObjectContent SQL query execution using DuckDB."""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Generator, Optional
|
||||
|
||||
try:
|
||||
import duckdb
|
||||
DUCKDB_AVAILABLE = True
|
||||
except ImportError:
|
||||
DUCKDB_AVAILABLE = False
|
||||
|
||||
|
||||
class SelectError(Exception):
|
||||
"""Error during SELECT query execution."""
|
||||
pass
|
||||
|
||||
|
||||
def execute_select_query(
|
||||
file_path: Path,
|
||||
expression: str,
|
||||
input_format: str,
|
||||
input_config: Dict[str, Any],
|
||||
output_format: str,
|
||||
output_config: Dict[str, Any],
|
||||
chunk_size: int = 65536,
|
||||
) -> Generator[bytes, None, None]:
|
||||
"""Execute SQL query on object content."""
|
||||
if not DUCKDB_AVAILABLE:
|
||||
raise SelectError("DuckDB is not installed. Install with: pip install duckdb")
|
||||
|
||||
conn = duckdb.connect(":memory:")
|
||||
|
||||
try:
|
||||
if input_format == "CSV":
|
||||
_load_csv(conn, file_path, input_config)
|
||||
elif input_format == "JSON":
|
||||
_load_json(conn, file_path, input_config)
|
||||
elif input_format == "Parquet":
|
||||
_load_parquet(conn, file_path)
|
||||
else:
|
||||
raise SelectError(f"Unsupported input format: {input_format}")
|
||||
|
||||
normalized_expression = expression.replace("s3object", "data").replace("S3Object", "data")
|
||||
|
||||
try:
|
||||
result = conn.execute(normalized_expression)
|
||||
except duckdb.Error as exc:
|
||||
raise SelectError(f"SQL execution error: {exc}")
|
||||
|
||||
if output_format == "CSV":
|
||||
yield from _output_csv(result, output_config, chunk_size)
|
||||
elif output_format == "JSON":
|
||||
yield from _output_json(result, output_config, chunk_size)
|
||||
else:
|
||||
raise SelectError(f"Unsupported output format: {output_format}")
|
||||
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def _load_csv(conn, file_path: Path, config: Dict[str, Any]) -> None:
|
||||
"""Load CSV file into DuckDB."""
|
||||
file_header_info = config.get("file_header_info", "NONE")
|
||||
delimiter = config.get("field_delimiter", ",")
|
||||
quote = config.get("quote_character", '"')
|
||||
|
||||
header = file_header_info in ("USE", "IGNORE")
|
||||
path_str = str(file_path).replace("\\", "/")
|
||||
|
||||
conn.execute(f"""
|
||||
CREATE TABLE data AS
|
||||
SELECT * FROM read_csv('{path_str}',
|
||||
header={header},
|
||||
delim='{delimiter}',
|
||||
quote='{quote}'
|
||||
)
|
||||
""")
|
||||
|
||||
|
||||
def _load_json(conn, file_path: Path, config: Dict[str, Any]) -> None:
|
||||
"""Load JSON file into DuckDB."""
|
||||
json_type = config.get("type", "DOCUMENT")
|
||||
path_str = str(file_path).replace("\\", "/")
|
||||
|
||||
if json_type == "LINES":
|
||||
conn.execute(f"""
|
||||
CREATE TABLE data AS
|
||||
SELECT * FROM read_json_auto('{path_str}', format='newline_delimited')
|
||||
""")
|
||||
else:
|
||||
conn.execute(f"""
|
||||
CREATE TABLE data AS
|
||||
SELECT * FROM read_json_auto('{path_str}', format='array')
|
||||
""")
|
||||
|
||||
|
||||
def _load_parquet(conn, file_path: Path) -> None:
|
||||
"""Load Parquet file into DuckDB."""
|
||||
path_str = str(file_path).replace("\\", "/")
|
||||
conn.execute(f"CREATE TABLE data AS SELECT * FROM read_parquet('{path_str}')")
|
||||
|
||||
|
||||
def _output_csv(
|
||||
result,
|
||||
config: Dict[str, Any],
|
||||
chunk_size: int,
|
||||
) -> Generator[bytes, None, None]:
|
||||
"""Output query results as CSV."""
|
||||
delimiter = config.get("field_delimiter", ",")
|
||||
record_delimiter = config.get("record_delimiter", "\n")
|
||||
quote = config.get("quote_character", '"')
|
||||
|
||||
buffer = ""
|
||||
|
||||
while True:
|
||||
rows = result.fetchmany(1000)
|
||||
if not rows:
|
||||
break
|
||||
|
||||
for row in rows:
|
||||
fields = []
|
||||
for value in row:
|
||||
if value is None:
|
||||
fields.append("")
|
||||
elif isinstance(value, str):
|
||||
if delimiter in value or quote in value or record_delimiter in value:
|
||||
escaped = value.replace(quote, quote + quote)
|
||||
fields.append(f'{quote}{escaped}{quote}')
|
||||
else:
|
||||
fields.append(value)
|
||||
else:
|
||||
fields.append(str(value))
|
||||
|
||||
buffer += delimiter.join(fields) + record_delimiter
|
||||
|
||||
while len(buffer) >= chunk_size:
|
||||
yield buffer[:chunk_size].encode("utf-8")
|
||||
buffer = buffer[chunk_size:]
|
||||
|
||||
if buffer:
|
||||
yield buffer.encode("utf-8")
|
||||
|
||||
|
||||
def _output_json(
|
||||
result,
|
||||
config: Dict[str, Any],
|
||||
chunk_size: int,
|
||||
) -> Generator[bytes, None, None]:
|
||||
"""Output query results as JSON Lines."""
|
||||
record_delimiter = config.get("record_delimiter", "\n")
|
||||
columns = [desc[0] for desc in result.description]
|
||||
|
||||
buffer = ""
|
||||
|
||||
while True:
|
||||
rows = result.fetchmany(1000)
|
||||
if not rows:
|
||||
break
|
||||
|
||||
for row in rows:
|
||||
record = dict(zip(columns, row))
|
||||
buffer += json.dumps(record, default=str) + record_delimiter
|
||||
|
||||
while len(buffer) >= chunk_size:
|
||||
yield buffer[:chunk_size].encode("utf-8")
|
||||
buffer = buffer[chunk_size:]
|
||||
|
||||
if buffer:
|
||||
yield buffer.encode("utf-8")
|
||||
177
app/site_registry.py
Normal file
177
app/site_registry.py
Normal file
@@ -0,0 +1,177 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class SiteInfo:
|
||||
site_id: str
|
||||
endpoint: str
|
||||
region: str = "us-east-1"
|
||||
priority: int = 100
|
||||
display_name: str = ""
|
||||
created_at: Optional[float] = None
|
||||
updated_at: Optional[float] = None
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.display_name:
|
||||
self.display_name = self.site_id
|
||||
if self.created_at is None:
|
||||
self.created_at = time.time()
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"site_id": self.site_id,
|
||||
"endpoint": self.endpoint,
|
||||
"region": self.region,
|
||||
"priority": self.priority,
|
||||
"display_name": self.display_name,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> SiteInfo:
|
||||
return cls(
|
||||
site_id=data["site_id"],
|
||||
endpoint=data.get("endpoint", ""),
|
||||
region=data.get("region", "us-east-1"),
|
||||
priority=data.get("priority", 100),
|
||||
display_name=data.get("display_name", ""),
|
||||
created_at=data.get("created_at"),
|
||||
updated_at=data.get("updated_at"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PeerSite:
|
||||
site_id: str
|
||||
endpoint: str
|
||||
region: str = "us-east-1"
|
||||
priority: int = 100
|
||||
display_name: str = ""
|
||||
created_at: Optional[float] = None
|
||||
updated_at: Optional[float] = None
|
||||
connection_id: Optional[str] = None
|
||||
is_healthy: Optional[bool] = None
|
||||
last_health_check: Optional[float] = None
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if not self.display_name:
|
||||
self.display_name = self.site_id
|
||||
if self.created_at is None:
|
||||
self.created_at = time.time()
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"site_id": self.site_id,
|
||||
"endpoint": self.endpoint,
|
||||
"region": self.region,
|
||||
"priority": self.priority,
|
||||
"display_name": self.display_name,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"connection_id": self.connection_id,
|
||||
"is_healthy": self.is_healthy,
|
||||
"last_health_check": self.last_health_check,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> PeerSite:
|
||||
return cls(
|
||||
site_id=data["site_id"],
|
||||
endpoint=data.get("endpoint", ""),
|
||||
region=data.get("region", "us-east-1"),
|
||||
priority=data.get("priority", 100),
|
||||
display_name=data.get("display_name", ""),
|
||||
created_at=data.get("created_at"),
|
||||
updated_at=data.get("updated_at"),
|
||||
connection_id=data.get("connection_id"),
|
||||
is_healthy=data.get("is_healthy"),
|
||||
last_health_check=data.get("last_health_check"),
|
||||
)
|
||||
|
||||
|
||||
class SiteRegistry:
|
||||
def __init__(self, config_path: Path) -> None:
|
||||
self.config_path = config_path
|
||||
self._local_site: Optional[SiteInfo] = None
|
||||
self._peers: Dict[str, PeerSite] = {}
|
||||
self.reload()
|
||||
|
||||
def reload(self) -> None:
|
||||
if not self.config_path.exists():
|
||||
self._local_site = None
|
||||
self._peers = {}
|
||||
return
|
||||
|
||||
try:
|
||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
if data.get("local"):
|
||||
self._local_site = SiteInfo.from_dict(data["local"])
|
||||
else:
|
||||
self._local_site = None
|
||||
|
||||
self._peers = {}
|
||||
for peer_data in data.get("peers", []):
|
||||
peer = PeerSite.from_dict(peer_data)
|
||||
self._peers[peer.site_id] = peer
|
||||
|
||||
except (OSError, json.JSONDecodeError, KeyError):
|
||||
self._local_site = None
|
||||
self._peers = {}
|
||||
|
||||
def save(self) -> None:
|
||||
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {
|
||||
"local": self._local_site.to_dict() if self._local_site else None,
|
||||
"peers": [peer.to_dict() for peer in self._peers.values()],
|
||||
}
|
||||
with open(self.config_path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
def get_local_site(self) -> Optional[SiteInfo]:
|
||||
return self._local_site
|
||||
|
||||
def set_local_site(self, site: SiteInfo) -> None:
|
||||
site.updated_at = time.time()
|
||||
self._local_site = site
|
||||
self.save()
|
||||
|
||||
def list_peers(self) -> List[PeerSite]:
|
||||
return list(self._peers.values())
|
||||
|
||||
def get_peer(self, site_id: str) -> Optional[PeerSite]:
|
||||
return self._peers.get(site_id)
|
||||
|
||||
def add_peer(self, peer: PeerSite) -> None:
|
||||
peer.created_at = peer.created_at or time.time()
|
||||
self._peers[peer.site_id] = peer
|
||||
self.save()
|
||||
|
||||
def update_peer(self, peer: PeerSite) -> None:
|
||||
if peer.site_id not in self._peers:
|
||||
raise ValueError(f"Peer {peer.site_id} not found")
|
||||
peer.updated_at = time.time()
|
||||
self._peers[peer.site_id] = peer
|
||||
self.save()
|
||||
|
||||
def delete_peer(self, site_id: str) -> bool:
|
||||
if site_id in self._peers:
|
||||
del self._peers[site_id]
|
||||
self.save()
|
||||
return True
|
||||
return False
|
||||
|
||||
def update_health(self, site_id: str, is_healthy: bool) -> None:
|
||||
peer = self._peers.get(site_id)
|
||||
if peer:
|
||||
peer.is_healthy = is_healthy
|
||||
peer.last_health_check = time.time()
|
||||
self.save()
|
||||
416
app/site_sync.py
Normal file
416
app/site_sync.py
Normal file
@@ -0,0 +1,416 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import tempfile
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
||||
|
||||
import boto3
|
||||
from botocore.config import Config
|
||||
from botocore.exceptions import ClientError
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .connections import ConnectionStore, RemoteConnection
|
||||
from .replication import ReplicationManager, ReplicationRule
|
||||
from .storage import ObjectStorage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SITE_SYNC_USER_AGENT = "SiteSyncAgent/1.0"
|
||||
|
||||
|
||||
@dataclass
|
||||
class SyncedObjectInfo:
|
||||
last_synced_at: float
|
||||
remote_etag: str
|
||||
source: str
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"last_synced_at": self.last_synced_at,
|
||||
"remote_etag": self.remote_etag,
|
||||
"source": self.source,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SyncedObjectInfo":
|
||||
return cls(
|
||||
last_synced_at=data["last_synced_at"],
|
||||
remote_etag=data["remote_etag"],
|
||||
source=data["source"],
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SyncState:
|
||||
synced_objects: Dict[str, SyncedObjectInfo] = field(default_factory=dict)
|
||||
last_full_sync: Optional[float] = None
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"synced_objects": {k: v.to_dict() for k, v in self.synced_objects.items()},
|
||||
"last_full_sync": self.last_full_sync,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SyncState":
|
||||
synced_objects = {}
|
||||
for k, v in data.get("synced_objects", {}).items():
|
||||
synced_objects[k] = SyncedObjectInfo.from_dict(v)
|
||||
return cls(
|
||||
synced_objects=synced_objects,
|
||||
last_full_sync=data.get("last_full_sync"),
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SiteSyncStats:
|
||||
last_sync_at: Optional[float] = None
|
||||
objects_pulled: int = 0
|
||||
objects_skipped: int = 0
|
||||
conflicts_resolved: int = 0
|
||||
deletions_applied: int = 0
|
||||
errors: int = 0
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"last_sync_at": self.last_sync_at,
|
||||
"objects_pulled": self.objects_pulled,
|
||||
"objects_skipped": self.objects_skipped,
|
||||
"conflicts_resolved": self.conflicts_resolved,
|
||||
"deletions_applied": self.deletions_applied,
|
||||
"errors": self.errors,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemoteObjectMeta:
|
||||
key: str
|
||||
size: int
|
||||
last_modified: datetime
|
||||
etag: str
|
||||
|
||||
@classmethod
|
||||
def from_s3_object(cls, obj: Dict[str, Any]) -> "RemoteObjectMeta":
|
||||
return cls(
|
||||
key=obj["Key"],
|
||||
size=obj.get("Size", 0),
|
||||
last_modified=obj["LastModified"],
|
||||
etag=obj.get("ETag", "").strip('"'),
|
||||
)
|
||||
|
||||
|
||||
def _create_sync_client(
|
||||
connection: "RemoteConnection",
|
||||
*,
|
||||
connect_timeout: int = 10,
|
||||
read_timeout: int = 120,
|
||||
max_retries: int = 2,
|
||||
) -> Any:
|
||||
config = Config(
|
||||
user_agent_extra=SITE_SYNC_USER_AGENT,
|
||||
connect_timeout=connect_timeout,
|
||||
read_timeout=read_timeout,
|
||||
retries={"max_attempts": max_retries},
|
||||
signature_version="s3v4",
|
||||
s3={"addressing_style": "path"},
|
||||
request_checksum_calculation="when_required",
|
||||
response_checksum_validation="when_required",
|
||||
)
|
||||
return boto3.client(
|
||||
"s3",
|
||||
endpoint_url=connection.endpoint_url,
|
||||
aws_access_key_id=connection.access_key,
|
||||
aws_secret_access_key=connection.secret_key,
|
||||
region_name=connection.region or "us-east-1",
|
||||
config=config,
|
||||
)
|
||||
|
||||
|
||||
class SiteSyncWorker:
|
||||
def __init__(
|
||||
self,
|
||||
storage: "ObjectStorage",
|
||||
connections: "ConnectionStore",
|
||||
replication_manager: "ReplicationManager",
|
||||
storage_root: Path,
|
||||
interval_seconds: int = 60,
|
||||
batch_size: int = 100,
|
||||
connect_timeout: int = 10,
|
||||
read_timeout: int = 120,
|
||||
max_retries: int = 2,
|
||||
clock_skew_tolerance_seconds: float = 1.0,
|
||||
):
|
||||
self.storage = storage
|
||||
self.connections = connections
|
||||
self.replication_manager = replication_manager
|
||||
self.storage_root = storage_root
|
||||
self.interval_seconds = interval_seconds
|
||||
self.batch_size = batch_size
|
||||
self.connect_timeout = connect_timeout
|
||||
self.read_timeout = read_timeout
|
||||
self.max_retries = max_retries
|
||||
self.clock_skew_tolerance_seconds = clock_skew_tolerance_seconds
|
||||
self._lock = threading.Lock()
|
||||
self._shutdown = threading.Event()
|
||||
self._sync_thread: Optional[threading.Thread] = None
|
||||
self._bucket_stats: Dict[str, SiteSyncStats] = {}
|
||||
|
||||
def _create_client(self, connection: "RemoteConnection") -> Any:
|
||||
"""Create an S3 client with the worker's configured timeouts."""
|
||||
return _create_sync_client(
|
||||
connection,
|
||||
connect_timeout=self.connect_timeout,
|
||||
read_timeout=self.read_timeout,
|
||||
max_retries=self.max_retries,
|
||||
)
|
||||
|
||||
def start(self) -> None:
|
||||
if self._sync_thread is not None and self._sync_thread.is_alive():
|
||||
return
|
||||
self._shutdown.clear()
|
||||
self._sync_thread = threading.Thread(
|
||||
target=self._sync_loop, name="site-sync-worker", daemon=True
|
||||
)
|
||||
self._sync_thread.start()
|
||||
logger.info("Site sync worker started (interval=%ds)", self.interval_seconds)
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._shutdown.set()
|
||||
if self._sync_thread is not None:
|
||||
self._sync_thread.join(timeout=10.0)
|
||||
logger.info("Site sync worker shut down")
|
||||
|
||||
def trigger_sync(self, bucket_name: str) -> Optional[SiteSyncStats]:
|
||||
from .replication import REPLICATION_MODE_BIDIRECTIONAL
|
||||
rule = self.replication_manager.get_rule(bucket_name)
|
||||
if not rule or rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled:
|
||||
return None
|
||||
return self._sync_bucket(rule)
|
||||
|
||||
def get_stats(self, bucket_name: str) -> Optional[SiteSyncStats]:
|
||||
with self._lock:
|
||||
return self._bucket_stats.get(bucket_name)
|
||||
|
||||
def _sync_loop(self) -> None:
|
||||
while not self._shutdown.is_set():
|
||||
self._shutdown.wait(timeout=self.interval_seconds)
|
||||
if self._shutdown.is_set():
|
||||
break
|
||||
self._run_sync_cycle()
|
||||
|
||||
def _run_sync_cycle(self) -> None:
|
||||
from .replication import REPLICATION_MODE_BIDIRECTIONAL
|
||||
for bucket_name, rule in list(self.replication_manager._rules.items()):
|
||||
if self._shutdown.is_set():
|
||||
break
|
||||
if rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled:
|
||||
continue
|
||||
try:
|
||||
stats = self._sync_bucket(rule)
|
||||
with self._lock:
|
||||
self._bucket_stats[bucket_name] = stats
|
||||
except Exception as e:
|
||||
logger.exception("Site sync failed for bucket %s: %s", bucket_name, e)
|
||||
|
||||
def _sync_bucket(self, rule: "ReplicationRule") -> SiteSyncStats:
|
||||
stats = SiteSyncStats()
|
||||
connection = self.connections.get(rule.target_connection_id)
|
||||
if not connection:
|
||||
logger.warning("Connection %s not found for bucket %s", rule.target_connection_id, rule.bucket_name)
|
||||
stats.errors += 1
|
||||
return stats
|
||||
|
||||
try:
|
||||
local_objects = self._list_local_objects(rule.bucket_name)
|
||||
except Exception as e:
|
||||
logger.error("Failed to list local objects for %s: %s", rule.bucket_name, e)
|
||||
stats.errors += 1
|
||||
return stats
|
||||
|
||||
try:
|
||||
remote_objects = self._list_remote_objects(rule, connection)
|
||||
except Exception as e:
|
||||
logger.error("Failed to list remote objects for %s: %s", rule.bucket_name, e)
|
||||
stats.errors += 1
|
||||
return stats
|
||||
|
||||
sync_state = self._load_sync_state(rule.bucket_name)
|
||||
local_keys = set(local_objects.keys())
|
||||
remote_keys = set(remote_objects.keys())
|
||||
|
||||
to_pull = []
|
||||
for key in remote_keys:
|
||||
remote_meta = remote_objects[key]
|
||||
local_meta = local_objects.get(key)
|
||||
if local_meta is None:
|
||||
to_pull.append(key)
|
||||
else:
|
||||
resolution = self._resolve_conflict(local_meta, remote_meta)
|
||||
if resolution == "pull":
|
||||
to_pull.append(key)
|
||||
stats.conflicts_resolved += 1
|
||||
else:
|
||||
stats.objects_skipped += 1
|
||||
|
||||
pulled_count = 0
|
||||
for key in to_pull:
|
||||
if self._shutdown.is_set():
|
||||
break
|
||||
if pulled_count >= self.batch_size:
|
||||
break
|
||||
remote_meta = remote_objects[key]
|
||||
success = self._pull_object(rule, key, connection, remote_meta)
|
||||
if success:
|
||||
stats.objects_pulled += 1
|
||||
pulled_count += 1
|
||||
sync_state.synced_objects[key] = SyncedObjectInfo(
|
||||
last_synced_at=time.time(),
|
||||
remote_etag=remote_meta.etag,
|
||||
source="remote",
|
||||
)
|
||||
else:
|
||||
stats.errors += 1
|
||||
|
||||
if rule.sync_deletions:
|
||||
for key in list(sync_state.synced_objects.keys()):
|
||||
if key not in remote_keys and key in local_keys:
|
||||
tracked = sync_state.synced_objects[key]
|
||||
if tracked.source == "remote":
|
||||
local_meta = local_objects.get(key)
|
||||
if local_meta and local_meta.last_modified.timestamp() <= tracked.last_synced_at:
|
||||
success = self._apply_remote_deletion(rule.bucket_name, key)
|
||||
if success:
|
||||
stats.deletions_applied += 1
|
||||
del sync_state.synced_objects[key]
|
||||
|
||||
sync_state.last_full_sync = time.time()
|
||||
self._save_sync_state(rule.bucket_name, sync_state)
|
||||
|
||||
with self.replication_manager._stats_lock:
|
||||
rule.last_pull_at = time.time()
|
||||
self.replication_manager.save_rules()
|
||||
|
||||
stats.last_sync_at = time.time()
|
||||
logger.info(
|
||||
"Site sync completed for %s: pulled=%d, skipped=%d, conflicts=%d, deletions=%d, errors=%d",
|
||||
rule.bucket_name,
|
||||
stats.objects_pulled,
|
||||
stats.objects_skipped,
|
||||
stats.conflicts_resolved,
|
||||
stats.deletions_applied,
|
||||
stats.errors,
|
||||
)
|
||||
return stats
|
||||
|
||||
def _list_local_objects(self, bucket_name: str) -> Dict[str, Any]:
|
||||
from .storage import ObjectMeta
|
||||
objects = self.storage.list_objects_all(bucket_name)
|
||||
return {obj.key: obj for obj in objects}
|
||||
|
||||
def _list_remote_objects(self, rule: "ReplicationRule", connection: "RemoteConnection") -> Dict[str, RemoteObjectMeta]:
|
||||
s3 = self._create_client(connection)
|
||||
result: Dict[str, RemoteObjectMeta] = {}
|
||||
paginator = s3.get_paginator("list_objects_v2")
|
||||
try:
|
||||
for page in paginator.paginate(Bucket=rule.target_bucket):
|
||||
for obj in page.get("Contents", []):
|
||||
meta = RemoteObjectMeta.from_s3_object(obj)
|
||||
result[meta.key] = meta
|
||||
except ClientError as e:
|
||||
if e.response["Error"]["Code"] == "NoSuchBucket":
|
||||
return {}
|
||||
raise
|
||||
return result
|
||||
|
||||
def _resolve_conflict(self, local_meta: Any, remote_meta: RemoteObjectMeta) -> str:
|
||||
local_ts = local_meta.last_modified.timestamp()
|
||||
remote_ts = remote_meta.last_modified.timestamp()
|
||||
|
||||
if abs(remote_ts - local_ts) < self.clock_skew_tolerance_seconds:
|
||||
local_etag = local_meta.etag or ""
|
||||
if remote_meta.etag == local_etag:
|
||||
return "skip"
|
||||
return "pull" if remote_meta.etag > local_etag else "keep"
|
||||
|
||||
return "pull" if remote_ts > local_ts else "keep"
|
||||
|
||||
def _pull_object(
|
||||
self,
|
||||
rule: "ReplicationRule",
|
||||
object_key: str,
|
||||
connection: "RemoteConnection",
|
||||
remote_meta: RemoteObjectMeta,
|
||||
) -> bool:
|
||||
s3 = self._create_client(connection)
|
||||
tmp_path = None
|
||||
try:
|
||||
tmp_dir = self.storage_root / ".myfsio.sys" / "tmp"
|
||||
tmp_dir.mkdir(parents=True, exist_ok=True)
|
||||
with tempfile.NamedTemporaryFile(dir=tmp_dir, delete=False) as tmp_file:
|
||||
tmp_path = Path(tmp_file.name)
|
||||
|
||||
s3.download_file(rule.target_bucket, object_key, str(tmp_path))
|
||||
|
||||
head_response = s3.head_object(Bucket=rule.target_bucket, Key=object_key)
|
||||
user_metadata = head_response.get("Metadata", {})
|
||||
|
||||
with open(tmp_path, "rb") as f:
|
||||
self.storage.put_object(
|
||||
rule.bucket_name,
|
||||
object_key,
|
||||
f,
|
||||
metadata=user_metadata if user_metadata else None,
|
||||
)
|
||||
|
||||
logger.debug("Pulled object %s/%s from remote", rule.bucket_name, object_key)
|
||||
return True
|
||||
|
||||
except ClientError as e:
|
||||
logger.error("Failed to pull %s/%s: %s", rule.bucket_name, object_key, e)
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error("Failed to store pulled object %s/%s: %s", rule.bucket_name, object_key, e)
|
||||
return False
|
||||
finally:
|
||||
if tmp_path and tmp_path.exists():
|
||||
try:
|
||||
tmp_path.unlink()
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
def _apply_remote_deletion(self, bucket_name: str, object_key: str) -> bool:
|
||||
try:
|
||||
self.storage.delete_object(bucket_name, object_key)
|
||||
logger.debug("Applied remote deletion for %s/%s", bucket_name, object_key)
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error("Failed to apply remote deletion for %s/%s: %s", bucket_name, object_key, e)
|
||||
return False
|
||||
|
||||
def _sync_state_path(self, bucket_name: str) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "site_sync_state.json"
|
||||
|
||||
def _load_sync_state(self, bucket_name: str) -> SyncState:
|
||||
path = self._sync_state_path(bucket_name)
|
||||
if not path.exists():
|
||||
return SyncState()
|
||||
try:
|
||||
data = json.loads(path.read_text(encoding="utf-8"))
|
||||
return SyncState.from_dict(data)
|
||||
except (json.JSONDecodeError, OSError, KeyError) as e:
|
||||
logger.warning("Failed to load sync state for %s: %s", bucket_name, e)
|
||||
return SyncState()
|
||||
|
||||
def _save_sync_state(self, bucket_name: str, state: SyncState) -> None:
|
||||
path = self._sync_state_path(bucket_name)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
path.write_text(json.dumps(state.to_dict(), indent=2), encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.warning("Failed to save sync state for %s: %s", bucket_name, e)
|
||||
1920
app/storage.py
1920
app/storage.py
File diff suppressed because it is too large
Load Diff
215
app/system_metrics.py
Normal file
215
app/system_metrics.py
Normal file
@@ -0,0 +1,215 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, TYPE_CHECKING
|
||||
|
||||
import psutil
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .storage import ObjectStorage
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SystemMetricsSnapshot:
|
||||
timestamp: datetime
|
||||
cpu_percent: float
|
||||
memory_percent: float
|
||||
disk_percent: float
|
||||
storage_bytes: int
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"timestamp": self.timestamp.strftime("%Y-%m-%dT%H:%M:%SZ"),
|
||||
"cpu_percent": round(self.cpu_percent, 2),
|
||||
"memory_percent": round(self.memory_percent, 2),
|
||||
"disk_percent": round(self.disk_percent, 2),
|
||||
"storage_bytes": self.storage_bytes,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "SystemMetricsSnapshot":
|
||||
timestamp_str = data["timestamp"]
|
||||
if timestamp_str.endswith("Z"):
|
||||
timestamp_str = timestamp_str[:-1] + "+00:00"
|
||||
return cls(
|
||||
timestamp=datetime.fromisoformat(timestamp_str),
|
||||
cpu_percent=data.get("cpu_percent", 0.0),
|
||||
memory_percent=data.get("memory_percent", 0.0),
|
||||
disk_percent=data.get("disk_percent", 0.0),
|
||||
storage_bytes=data.get("storage_bytes", 0),
|
||||
)
|
||||
|
||||
|
||||
class SystemMetricsCollector:
|
||||
def __init__(
|
||||
self,
|
||||
storage_root: Path,
|
||||
interval_minutes: int = 5,
|
||||
retention_hours: int = 24,
|
||||
):
|
||||
self.storage_root = storage_root
|
||||
self.interval_seconds = interval_minutes * 60
|
||||
self.retention_hours = retention_hours
|
||||
self._lock = threading.Lock()
|
||||
self._shutdown = threading.Event()
|
||||
self._snapshots: List[SystemMetricsSnapshot] = []
|
||||
self._storage_ref: Optional["ObjectStorage"] = None
|
||||
|
||||
self._load_history()
|
||||
|
||||
self._snapshot_thread = threading.Thread(
|
||||
target=self._snapshot_loop,
|
||||
name="system-metrics-snapshot",
|
||||
daemon=True,
|
||||
)
|
||||
self._snapshot_thread.start()
|
||||
|
||||
def set_storage(self, storage: "ObjectStorage") -> None:
|
||||
with self._lock:
|
||||
self._storage_ref = storage
|
||||
|
||||
def _config_path(self) -> Path:
|
||||
return self.storage_root / ".myfsio.sys" / "config" / "metrics_history.json"
|
||||
|
||||
def _load_history(self) -> None:
|
||||
config_path = self._config_path()
|
||||
if not config_path.exists():
|
||||
return
|
||||
try:
|
||||
data = json.loads(config_path.read_text(encoding="utf-8"))
|
||||
history_data = data.get("history", [])
|
||||
self._snapshots = [SystemMetricsSnapshot.from_dict(s) for s in history_data]
|
||||
self._prune_old_snapshots()
|
||||
except (json.JSONDecodeError, OSError, KeyError) as e:
|
||||
logger.warning(f"Failed to load system metrics history: {e}")
|
||||
|
||||
def _save_history(self) -> None:
|
||||
config_path = self._config_path()
|
||||
config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
try:
|
||||
data = {"history": [s.to_dict() for s in self._snapshots]}
|
||||
config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
|
||||
except OSError as e:
|
||||
logger.warning(f"Failed to save system metrics history: {e}")
|
||||
|
||||
def _prune_old_snapshots(self) -> None:
|
||||
if not self._snapshots:
|
||||
return
|
||||
cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600)
|
||||
self._snapshots = [
|
||||
s for s in self._snapshots if s.timestamp.timestamp() > cutoff
|
||||
]
|
||||
|
||||
def _snapshot_loop(self) -> None:
|
||||
while not self._shutdown.is_set():
|
||||
self._shutdown.wait(timeout=self.interval_seconds)
|
||||
if not self._shutdown.is_set():
|
||||
self._take_snapshot()
|
||||
|
||||
def _take_snapshot(self) -> None:
|
||||
try:
|
||||
cpu_percent = psutil.cpu_percent(interval=0.1)
|
||||
memory = psutil.virtual_memory()
|
||||
disk = psutil.disk_usage(str(self.storage_root))
|
||||
|
||||
storage_bytes = 0
|
||||
with self._lock:
|
||||
storage = self._storage_ref
|
||||
if storage:
|
||||
try:
|
||||
buckets = storage.list_buckets()
|
||||
for bucket in buckets:
|
||||
stats = storage.bucket_stats(bucket.name, cache_ttl=60)
|
||||
storage_bytes += stats.get("total_bytes", stats.get("bytes", 0))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to collect bucket stats: {e}")
|
||||
|
||||
snapshot = SystemMetricsSnapshot(
|
||||
timestamp=datetime.now(timezone.utc),
|
||||
cpu_percent=cpu_percent,
|
||||
memory_percent=memory.percent,
|
||||
disk_percent=disk.percent,
|
||||
storage_bytes=storage_bytes,
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
self._snapshots.append(snapshot)
|
||||
self._prune_old_snapshots()
|
||||
self._save_history()
|
||||
|
||||
logger.debug(f"System metrics snapshot taken: CPU={cpu_percent:.1f}%, Memory={memory.percent:.1f}%")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to take system metrics snapshot: {e}")
|
||||
|
||||
def get_current(self) -> Dict[str, Any]:
|
||||
cpu_percent = psutil.cpu_percent(interval=0.1)
|
||||
memory = psutil.virtual_memory()
|
||||
disk = psutil.disk_usage(str(self.storage_root))
|
||||
boot_time = psutil.boot_time()
|
||||
uptime_seconds = time.time() - boot_time
|
||||
uptime_days = int(uptime_seconds / 86400)
|
||||
|
||||
total_buckets = 0
|
||||
total_objects = 0
|
||||
total_bytes_used = 0
|
||||
total_versions = 0
|
||||
|
||||
with self._lock:
|
||||
storage = self._storage_ref
|
||||
if storage:
|
||||
try:
|
||||
buckets = storage.list_buckets()
|
||||
total_buckets = len(buckets)
|
||||
for bucket in buckets:
|
||||
stats = storage.bucket_stats(bucket.name, cache_ttl=60)
|
||||
total_objects += stats.get("total_objects", stats.get("objects", 0))
|
||||
total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0))
|
||||
total_versions += stats.get("version_count", 0)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to collect current bucket stats: {e}")
|
||||
|
||||
return {
|
||||
"cpu_percent": round(cpu_percent, 2),
|
||||
"memory": {
|
||||
"total": memory.total,
|
||||
"available": memory.available,
|
||||
"used": memory.used,
|
||||
"percent": round(memory.percent, 2),
|
||||
},
|
||||
"disk": {
|
||||
"total": disk.total,
|
||||
"free": disk.free,
|
||||
"used": disk.used,
|
||||
"percent": round(disk.percent, 2),
|
||||
},
|
||||
"app": {
|
||||
"buckets": total_buckets,
|
||||
"objects": total_objects,
|
||||
"versions": total_versions,
|
||||
"storage_bytes": total_bytes_used,
|
||||
"uptime_days": uptime_days,
|
||||
},
|
||||
}
|
||||
|
||||
def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]:
|
||||
with self._lock:
|
||||
snapshots = list(self._snapshots)
|
||||
|
||||
if hours:
|
||||
cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
|
||||
snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff]
|
||||
|
||||
return [s.to_dict() for s in snapshots]
|
||||
|
||||
def shutdown(self) -> None:
|
||||
self._shutdown.set()
|
||||
self._take_snapshot()
|
||||
self._snapshot_thread.join(timeout=5.0)
|
||||
@@ -1,7 +1,6 @@
|
||||
"""Central location for the application version string."""
|
||||
from __future__ import annotations
|
||||
|
||||
APP_VERSION = "0.1.8"
|
||||
APP_VERSION = "0.4.3"
|
||||
|
||||
|
||||
def get_version() -> str:
|
||||
|
||||
108
app/website_domains.py
Normal file
108
app/website_domains.py
Normal file
@@ -0,0 +1,108 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
_DOMAIN_RE = re.compile(
|
||||
r"^(?!-)[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$"
|
||||
)
|
||||
|
||||
|
||||
def normalize_domain(raw: str) -> str:
|
||||
raw = raw.strip().lower()
|
||||
for prefix in ("https://", "http://"):
|
||||
if raw.startswith(prefix):
|
||||
raw = raw[len(prefix):]
|
||||
raw = raw.split("/", 1)[0]
|
||||
raw = raw.split("?", 1)[0]
|
||||
raw = raw.split("#", 1)[0]
|
||||
if ":" in raw:
|
||||
raw = raw.rsplit(":", 1)[0]
|
||||
return raw
|
||||
|
||||
|
||||
def is_valid_domain(domain: str) -> bool:
|
||||
if not domain or len(domain) > 253:
|
||||
return False
|
||||
return bool(_DOMAIN_RE.match(domain))
|
||||
|
||||
|
||||
class WebsiteDomainStore:
|
||||
def __init__(self, config_path: Path) -> None:
|
||||
self.config_path = config_path
|
||||
self._lock = threading.Lock()
|
||||
self._domains: Dict[str, str] = {}
|
||||
self._last_mtime: float = 0.0
|
||||
self.reload()
|
||||
|
||||
def reload(self) -> None:
|
||||
if not self.config_path.exists():
|
||||
self._domains = {}
|
||||
self._last_mtime = 0.0
|
||||
return
|
||||
try:
|
||||
self._last_mtime = self.config_path.stat().st_mtime
|
||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, dict):
|
||||
self._domains = {k.lower(): v for k, v in data.items()}
|
||||
else:
|
||||
self._domains = {}
|
||||
except (OSError, json.JSONDecodeError):
|
||||
self._domains = {}
|
||||
|
||||
def _maybe_reload(self) -> None:
|
||||
try:
|
||||
if self.config_path.exists():
|
||||
mtime = self.config_path.stat().st_mtime
|
||||
if mtime != self._last_mtime:
|
||||
self._last_mtime = mtime
|
||||
with open(self.config_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
if isinstance(data, dict):
|
||||
self._domains = {k.lower(): v for k, v in data.items()}
|
||||
else:
|
||||
self._domains = {}
|
||||
elif self._domains:
|
||||
self._domains = {}
|
||||
self._last_mtime = 0.0
|
||||
except (OSError, json.JSONDecodeError):
|
||||
pass
|
||||
|
||||
def _save(self) -> None:
|
||||
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(self.config_path, "w", encoding="utf-8") as f:
|
||||
json.dump(self._domains, f, indent=2)
|
||||
self._last_mtime = self.config_path.stat().st_mtime
|
||||
|
||||
def list_all(self) -> List[Dict[str, str]]:
|
||||
with self._lock:
|
||||
self._maybe_reload()
|
||||
return [{"domain": d, "bucket": b} for d, b in self._domains.items()]
|
||||
|
||||
def get_bucket(self, domain: str) -> Optional[str]:
|
||||
with self._lock:
|
||||
self._maybe_reload()
|
||||
return self._domains.get(domain.lower())
|
||||
|
||||
def get_domains_for_bucket(self, bucket: str) -> List[str]:
|
||||
with self._lock:
|
||||
self._maybe_reload()
|
||||
return [d for d, b in self._domains.items() if b == bucket]
|
||||
|
||||
def set_mapping(self, domain: str, bucket: str) -> None:
|
||||
with self._lock:
|
||||
self._domains[domain.lower()] = bucket
|
||||
self._save()
|
||||
|
||||
def delete_mapping(self, domain: str) -> bool:
|
||||
with self._lock:
|
||||
key = domain.lower()
|
||||
if key not in self._domains:
|
||||
return False
|
||||
del self._domains[key]
|
||||
self._save()
|
||||
return True
|
||||
@@ -1,5 +1,6 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
# Run both services using the python runner in production mode
|
||||
exec python run.py --prod
|
||||
ENGINE="${ENGINE:-rust}"
|
||||
|
||||
exec python run.py --prod --engine "$ENGINE"
|
||||
|
||||
3443
myfsio-engine/Cargo.lock
generated
Normal file
3443
myfsio-engine/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
45
myfsio-engine/Cargo.toml
Normal file
45
myfsio-engine/Cargo.toml
Normal file
@@ -0,0 +1,45 @@
|
||||
[workspace]
|
||||
resolver = "2"
|
||||
members = [
|
||||
"crates/myfsio-common",
|
||||
"crates/myfsio-auth",
|
||||
"crates/myfsio-crypto",
|
||||
"crates/myfsio-storage",
|
||||
"crates/myfsio-xml",
|
||||
"crates/myfsio-server",
|
||||
]
|
||||
|
||||
[workspace.dependencies]
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
axum = { version = "0.8" }
|
||||
tower = { version = "0.5" }
|
||||
tower-http = { version = "0.6", features = ["cors", "trace"] }
|
||||
hyper = { version = "1" }
|
||||
bytes = "1"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
quick-xml = { version = "0.37", features = ["serialize"] }
|
||||
hmac = "0.12"
|
||||
sha2 = "0.10"
|
||||
md-5 = "0.10"
|
||||
hex = "0.4"
|
||||
aes = "0.8"
|
||||
aes-gcm = "0.10"
|
||||
cbc = { version = "0.1", features = ["alloc"] }
|
||||
hkdf = "0.12"
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
parking_lot = "0.12"
|
||||
lru = "0.14"
|
||||
percent-encoding = "2"
|
||||
regex = "1"
|
||||
unicode-normalization = "0.1"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = "0.3"
|
||||
thiserror = "2"
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
base64 = "0.22"
|
||||
tokio-util = { version = "0.7", features = ["io"] }
|
||||
futures = "0.3"
|
||||
dashmap = "6"
|
||||
crc32fast = "1"
|
||||
duckdb = { version = "1", features = ["bundled"] }
|
||||
26
myfsio-engine/crates/myfsio-auth/Cargo.toml
Normal file
26
myfsio-engine/crates/myfsio-auth/Cargo.toml
Normal file
@@ -0,0 +1,26 @@
|
||||
[package]
|
||||
name = "myfsio-auth"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
myfsio-common = { path = "../myfsio-common" }
|
||||
hmac = { workspace = true }
|
||||
sha2 = { workspace = true }
|
||||
hex = { workspace = true }
|
||||
aes = { workspace = true }
|
||||
cbc = { workspace = true }
|
||||
base64 = { workspace = true }
|
||||
pbkdf2 = "0.12"
|
||||
lru = { workspace = true }
|
||||
parking_lot = { workspace = true }
|
||||
percent-encoding = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
80
myfsio-engine/crates/myfsio-auth/src/fernet.rs
Normal file
80
myfsio-engine/crates/myfsio-auth/src/fernet.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
use aes::cipher::{block_padding::Pkcs7, BlockDecryptMut, KeyIvInit};
|
||||
use base64::{engine::general_purpose::URL_SAFE, Engine};
|
||||
use hmac::{Hmac, Mac};
|
||||
use sha2::Sha256;
|
||||
|
||||
type Aes128CbcDec = cbc::Decryptor<aes::Aes128>;
|
||||
type HmacSha256 = Hmac<Sha256>;
|
||||
|
||||
pub fn derive_fernet_key(secret: &str) -> String {
|
||||
let mut derived = [0u8; 32];
|
||||
pbkdf2::pbkdf2_hmac::<Sha256>(
|
||||
secret.as_bytes(),
|
||||
b"myfsio-iam-encryption",
|
||||
100_000,
|
||||
&mut derived,
|
||||
);
|
||||
URL_SAFE.encode(derived)
|
||||
}
|
||||
|
||||
pub fn decrypt(key_b64: &str, token: &str) -> Result<Vec<u8>, &'static str> {
|
||||
let key_bytes = URL_SAFE
|
||||
.decode(key_b64)
|
||||
.map_err(|_| "invalid fernet key base64")?;
|
||||
if key_bytes.len() != 32 {
|
||||
return Err("fernet key must be 32 bytes");
|
||||
}
|
||||
|
||||
let signing_key = &key_bytes[..16];
|
||||
let encryption_key = &key_bytes[16..];
|
||||
|
||||
let token_bytes = URL_SAFE
|
||||
.decode(token)
|
||||
.map_err(|_| "invalid fernet token base64")?;
|
||||
|
||||
if token_bytes.len() < 57 {
|
||||
return Err("fernet token too short");
|
||||
}
|
||||
|
||||
if token_bytes[0] != 0x80 {
|
||||
return Err("invalid fernet version");
|
||||
}
|
||||
|
||||
let hmac_offset = token_bytes.len() - 32;
|
||||
let payload = &token_bytes[..hmac_offset];
|
||||
let expected_hmac = &token_bytes[hmac_offset..];
|
||||
|
||||
let mut mac =
|
||||
HmacSha256::new_from_slice(signing_key).map_err(|_| "hmac key error")?;
|
||||
mac.update(payload);
|
||||
mac.verify_slice(expected_hmac)
|
||||
.map_err(|_| "HMAC verification failed")?;
|
||||
|
||||
let iv = &token_bytes[9..25];
|
||||
let ciphertext = &token_bytes[25..hmac_offset];
|
||||
|
||||
let plaintext = Aes128CbcDec::new(encryption_key.into(), iv.into())
|
||||
.decrypt_padded_vec_mut::<Pkcs7>(ciphertext)
|
||||
.map_err(|_| "AES-CBC decryption failed")?;
|
||||
|
||||
Ok(plaintext)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_derive_fernet_key_format() {
|
||||
let key = derive_fernet_key("test-secret");
|
||||
let decoded = URL_SAFE.decode(&key).unwrap();
|
||||
assert_eq!(decoded.len(), 32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_roundtrip_with_python_compat() {
|
||||
let key = derive_fernet_key("dev-secret-key");
|
||||
let decoded = URL_SAFE.decode(&key).unwrap();
|
||||
assert_eq!(decoded.len(), 32);
|
||||
}
|
||||
}
|
||||
812
myfsio-engine/crates/myfsio-auth/src/iam.rs
Normal file
812
myfsio-engine/crates/myfsio-auth/src/iam.rs
Normal file
@@ -0,0 +1,812 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use myfsio_common::types::Principal;
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Instant, SystemTime};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct IamConfig {
|
||||
#[serde(default = "default_version")]
|
||||
pub version: u32,
|
||||
#[serde(default)]
|
||||
pub users: Vec<IamUser>,
|
||||
}
|
||||
|
||||
fn default_version() -> u32 {
|
||||
2
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct IamUser {
|
||||
pub user_id: String,
|
||||
pub display_name: String,
|
||||
#[serde(default = "default_enabled")]
|
||||
pub enabled: bool,
|
||||
#[serde(default)]
|
||||
pub expires_at: Option<String>,
|
||||
#[serde(default)]
|
||||
pub access_keys: Vec<AccessKey>,
|
||||
#[serde(default)]
|
||||
pub policies: Vec<IamPolicy>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
struct RawIamConfig {
|
||||
#[serde(default)]
|
||||
pub users: Vec<RawIamUser>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize)]
|
||||
struct RawIamUser {
|
||||
pub user_id: Option<String>,
|
||||
pub display_name: Option<String>,
|
||||
#[serde(default = "default_enabled")]
|
||||
pub enabled: bool,
|
||||
#[serde(default)]
|
||||
pub expires_at: Option<String>,
|
||||
pub access_key: Option<String>,
|
||||
pub secret_key: Option<String>,
|
||||
#[serde(default)]
|
||||
pub access_keys: Vec<AccessKey>,
|
||||
#[serde(default)]
|
||||
pub policies: Vec<IamPolicy>,
|
||||
}
|
||||
|
||||
impl RawIamUser {
|
||||
fn normalize(self) -> IamUser {
|
||||
let mut access_keys = self.access_keys;
|
||||
if access_keys.is_empty() {
|
||||
if let (Some(ak), Some(sk)) = (self.access_key, self.secret_key) {
|
||||
access_keys.push(AccessKey {
|
||||
access_key: ak,
|
||||
secret_key: sk,
|
||||
status: "active".to_string(),
|
||||
created_at: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
let display_name = self.display_name.unwrap_or_else(|| {
|
||||
access_keys.first().map(|k| k.access_key.clone()).unwrap_or_else(|| "unknown".to_string())
|
||||
});
|
||||
let user_id = self.user_id.unwrap_or_else(|| {
|
||||
format!("u-{}", display_name.to_ascii_lowercase().replace(' ', "-"))
|
||||
});
|
||||
IamUser {
|
||||
user_id,
|
||||
display_name,
|
||||
enabled: self.enabled,
|
||||
expires_at: self.expires_at,
|
||||
access_keys,
|
||||
policies: self.policies,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_enabled() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct AccessKey {
|
||||
pub access_key: String,
|
||||
pub secret_key: String,
|
||||
#[serde(default = "default_status")]
|
||||
pub status: String,
|
||||
#[serde(default)]
|
||||
pub created_at: Option<String>,
|
||||
}
|
||||
|
||||
fn default_status() -> String {
|
||||
"active".to_string()
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct IamPolicy {
|
||||
pub bucket: String,
|
||||
pub actions: Vec<String>,
|
||||
#[serde(default = "default_prefix")]
|
||||
pub prefix: String,
|
||||
}
|
||||
|
||||
fn default_prefix() -> String {
|
||||
"*".to_string()
|
||||
}
|
||||
|
||||
struct IamState {
|
||||
key_secrets: HashMap<String, String>,
|
||||
key_index: HashMap<String, String>,
|
||||
key_status: HashMap<String, String>,
|
||||
user_records: HashMap<String, IamUser>,
|
||||
file_mtime: Option<SystemTime>,
|
||||
last_check: Instant,
|
||||
}
|
||||
|
||||
pub struct IamService {
|
||||
config_path: PathBuf,
|
||||
state: Arc<RwLock<IamState>>,
|
||||
check_interval: std::time::Duration,
|
||||
fernet_key: Option<String>,
|
||||
}
|
||||
|
||||
impl IamService {
|
||||
pub fn new(config_path: PathBuf) -> Self {
|
||||
Self::new_with_secret(config_path, None)
|
||||
}
|
||||
|
||||
pub fn new_with_secret(config_path: PathBuf, secret_key: Option<String>) -> Self {
|
||||
let fernet_key = secret_key.map(|s| crate::fernet::derive_fernet_key(&s));
|
||||
let service = Self {
|
||||
config_path,
|
||||
state: Arc::new(RwLock::new(IamState {
|
||||
key_secrets: HashMap::new(),
|
||||
key_index: HashMap::new(),
|
||||
key_status: HashMap::new(),
|
||||
user_records: HashMap::new(),
|
||||
file_mtime: None,
|
||||
last_check: Instant::now(),
|
||||
})),
|
||||
check_interval: std::time::Duration::from_secs(2),
|
||||
fernet_key,
|
||||
};
|
||||
service.reload();
|
||||
service
|
||||
}
|
||||
|
||||
fn reload_if_needed(&self) {
|
||||
{
|
||||
let state = self.state.read();
|
||||
if state.last_check.elapsed() < self.check_interval {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
let current_mtime = std::fs::metadata(&self.config_path)
|
||||
.and_then(|m| m.modified())
|
||||
.ok();
|
||||
|
||||
let needs_reload = {
|
||||
let state = self.state.read();
|
||||
match (&state.file_mtime, ¤t_mtime) {
|
||||
(None, Some(_)) => true,
|
||||
(Some(old), Some(new)) => old != new,
|
||||
(Some(_), None) => true,
|
||||
(None, None) => state.key_secrets.is_empty(),
|
||||
}
|
||||
};
|
||||
|
||||
if needs_reload {
|
||||
self.reload();
|
||||
}
|
||||
|
||||
self.state.write().last_check = Instant::now();
|
||||
}
|
||||
|
||||
fn reload(&self) {
|
||||
let content = match std::fs::read_to_string(&self.config_path) {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
tracing::warn!("Failed to read IAM config {}: {}", self.config_path.display(), e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let raw = if content.starts_with("MYFSIO_IAM_ENC:") {
|
||||
let encrypted_token = &content["MYFSIO_IAM_ENC:".len()..];
|
||||
match &self.fernet_key {
|
||||
Some(key) => match crate::fernet::decrypt(key, encrypted_token.trim()) {
|
||||
Ok(plaintext) => match String::from_utf8(plaintext) {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
tracing::error!("Decrypted IAM config is not valid UTF-8: {}", e);
|
||||
return;
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to decrypt IAM config: {}. SECRET_KEY may have changed.", e);
|
||||
return;
|
||||
}
|
||||
},
|
||||
None => {
|
||||
tracing::error!("IAM config is encrypted but no SECRET_KEY configured");
|
||||
return;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
content
|
||||
};
|
||||
|
||||
let raw_config: RawIamConfig = match serde_json::from_str(&raw) {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to parse IAM config: {}", e);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let users: Vec<IamUser> = raw_config.users.into_iter().map(|u| u.normalize()).collect();
|
||||
|
||||
let mut key_secrets = HashMap::new();
|
||||
let mut key_index = HashMap::new();
|
||||
let mut key_status = HashMap::new();
|
||||
let mut user_records = HashMap::new();
|
||||
|
||||
for user in &users {
|
||||
user_records.insert(user.user_id.clone(), user.clone());
|
||||
for ak in &user.access_keys {
|
||||
key_secrets.insert(ak.access_key.clone(), ak.secret_key.clone());
|
||||
key_index.insert(ak.access_key.clone(), user.user_id.clone());
|
||||
key_status.insert(ak.access_key.clone(), ak.status.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let file_mtime = std::fs::metadata(&self.config_path)
|
||||
.and_then(|m| m.modified())
|
||||
.ok();
|
||||
|
||||
let mut state = self.state.write();
|
||||
state.key_secrets = key_secrets;
|
||||
state.key_index = key_index;
|
||||
state.key_status = key_status;
|
||||
state.user_records = user_records;
|
||||
state.file_mtime = file_mtime;
|
||||
state.last_check = Instant::now();
|
||||
|
||||
tracing::info!("IAM config reloaded: {} users, {} keys",
|
||||
users.len(),
|
||||
state.key_secrets.len());
|
||||
}
|
||||
|
||||
pub fn get_secret_key(&self, access_key: &str) -> Option<String> {
|
||||
self.reload_if_needed();
|
||||
let state = self.state.read();
|
||||
|
||||
let status = state.key_status.get(access_key)?;
|
||||
if status != "active" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let user_id = state.key_index.get(access_key)?;
|
||||
let user = state.user_records.get(user_id)?;
|
||||
if !user.enabled {
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Some(ref expires_at) = user.expires_at {
|
||||
if let Ok(exp) = expires_at.parse::<DateTime<Utc>>() {
|
||||
if Utc::now() > exp {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
state.key_secrets.get(access_key).cloned()
|
||||
}
|
||||
|
||||
pub fn get_principal(&self, access_key: &str) -> Option<Principal> {
|
||||
self.reload_if_needed();
|
||||
let state = self.state.read();
|
||||
|
||||
let status = state.key_status.get(access_key)?;
|
||||
if status != "active" {
|
||||
return None;
|
||||
}
|
||||
|
||||
let user_id = state.key_index.get(access_key)?;
|
||||
let user = state.user_records.get(user_id)?;
|
||||
if !user.enabled {
|
||||
return None;
|
||||
}
|
||||
|
||||
if let Some(ref expires_at) = user.expires_at {
|
||||
if let Ok(exp) = expires_at.parse::<DateTime<Utc>>() {
|
||||
if Utc::now() > exp {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let is_admin = user.policies.iter().any(|p| {
|
||||
p.bucket == "*" && p.actions.iter().any(|a| a == "*")
|
||||
});
|
||||
|
||||
Some(Principal::new(
|
||||
access_key.to_string(),
|
||||
user.user_id.clone(),
|
||||
user.display_name.clone(),
|
||||
is_admin,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn authenticate(&self, access_key: &str, secret_key: &str) -> Option<Principal> {
|
||||
let stored_secret = self.get_secret_key(access_key)?;
|
||||
if !crate::sigv4::constant_time_compare(&stored_secret, secret_key) {
|
||||
return None;
|
||||
}
|
||||
self.get_principal(access_key)
|
||||
}
|
||||
|
||||
pub fn authorize(
|
||||
&self,
|
||||
principal: &Principal,
|
||||
bucket_name: Option<&str>,
|
||||
action: &str,
|
||||
object_key: Option<&str>,
|
||||
) -> bool {
|
||||
self.reload_if_needed();
|
||||
|
||||
if principal.is_admin {
|
||||
return true;
|
||||
}
|
||||
|
||||
let normalized_bucket = bucket_name
|
||||
.unwrap_or("*")
|
||||
.trim()
|
||||
.to_ascii_lowercase();
|
||||
let normalized_action = action.trim().to_ascii_lowercase();
|
||||
|
||||
let state = self.state.read();
|
||||
let user = match state.user_records.get(&principal.user_id) {
|
||||
Some(u) => u,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
if !user.enabled {
|
||||
return false;
|
||||
}
|
||||
|
||||
if let Some(ref expires_at) = user.expires_at {
|
||||
if let Ok(exp) = expires_at.parse::<DateTime<Utc>>() {
|
||||
if Utc::now() > exp {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for policy in &user.policies {
|
||||
if !bucket_matches(&policy.bucket, &normalized_bucket) {
|
||||
continue;
|
||||
}
|
||||
if !action_matches(&policy.actions, &normalized_action) {
|
||||
continue;
|
||||
}
|
||||
if let Some(key) = object_key {
|
||||
if !prefix_matches(&policy.prefix, key) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
pub async fn list_users(&self) -> Vec<serde_json::Value> {
|
||||
self.reload_if_needed();
|
||||
let state = self.state.read();
|
||||
state
|
||||
.user_records
|
||||
.values()
|
||||
.map(|u| {
|
||||
serde_json::json!({
|
||||
"user_id": u.user_id,
|
||||
"display_name": u.display_name,
|
||||
"enabled": u.enabled,
|
||||
"access_keys": u.access_keys.iter().map(|k| {
|
||||
serde_json::json!({
|
||||
"access_key": k.access_key,
|
||||
"status": k.status,
|
||||
"created_at": k.created_at,
|
||||
})
|
||||
}).collect::<Vec<_>>(),
|
||||
"policy_count": u.policies.len(),
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub async fn get_user(&self, identifier: &str) -> Option<serde_json::Value> {
|
||||
self.reload_if_needed();
|
||||
let state = self.state.read();
|
||||
|
||||
let user = state
|
||||
.user_records
|
||||
.get(identifier)
|
||||
.or_else(|| {
|
||||
state.key_index.get(identifier).and_then(|uid| state.user_records.get(uid))
|
||||
})?;
|
||||
|
||||
Some(serde_json::json!({
|
||||
"user_id": user.user_id,
|
||||
"display_name": user.display_name,
|
||||
"enabled": user.enabled,
|
||||
"expires_at": user.expires_at,
|
||||
"access_keys": user.access_keys.iter().map(|k| {
|
||||
serde_json::json!({
|
||||
"access_key": k.access_key,
|
||||
"status": k.status,
|
||||
"created_at": k.created_at,
|
||||
})
|
||||
}).collect::<Vec<_>>(),
|
||||
"policies": user.policies,
|
||||
}))
|
||||
}
|
||||
|
||||
pub async fn set_user_enabled(&self, identifier: &str, enabled: bool) -> Result<(), String> {
|
||||
let content = std::fs::read_to_string(&self.config_path)
|
||||
.map_err(|e| format!("Failed to read IAM config: {}", e))?;
|
||||
|
||||
let raw: RawIamConfig = serde_json::from_str(&content)
|
||||
.map_err(|e| format!("Failed to parse IAM config: {}", e))?;
|
||||
let mut config = IamConfig {
|
||||
version: 2,
|
||||
users: raw.users.into_iter().map(|u| u.normalize()).collect(),
|
||||
};
|
||||
|
||||
let user = config
|
||||
.users
|
||||
.iter_mut()
|
||||
.find(|u| {
|
||||
u.user_id == identifier
|
||||
|| u.access_keys.iter().any(|k| k.access_key == identifier)
|
||||
})
|
||||
.ok_or_else(|| "User not found".to_string())?;
|
||||
|
||||
user.enabled = enabled;
|
||||
|
||||
let json = serde_json::to_string_pretty(&config)
|
||||
.map_err(|e| format!("Failed to serialize IAM config: {}", e))?;
|
||||
std::fs::write(&self.config_path, json)
|
||||
.map_err(|e| format!("Failed to write IAM config: {}", e))?;
|
||||
|
||||
self.reload();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_user_policies(&self, identifier: &str) -> Option<Vec<serde_json::Value>> {
|
||||
self.reload_if_needed();
|
||||
let state = self.state.read();
|
||||
let user = state
|
||||
.user_records
|
||||
.get(identifier)
|
||||
.or_else(|| {
|
||||
state.key_index.get(identifier).and_then(|uid| state.user_records.get(uid))
|
||||
})?;
|
||||
Some(
|
||||
user.policies
|
||||
.iter()
|
||||
.map(|p| serde_json::to_value(p).unwrap_or_default())
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn create_access_key(&self, identifier: &str) -> Result<serde_json::Value, String> {
|
||||
let content = std::fs::read_to_string(&self.config_path)
|
||||
.map_err(|e| format!("Failed to read IAM config: {}", e))?;
|
||||
let raw: RawIamConfig = serde_json::from_str(&content)
|
||||
.map_err(|e| format!("Failed to parse IAM config: {}", e))?;
|
||||
let mut config = IamConfig {
|
||||
version: 2,
|
||||
users: raw.users.into_iter().map(|u| u.normalize()).collect(),
|
||||
};
|
||||
|
||||
let user = config
|
||||
.users
|
||||
.iter_mut()
|
||||
.find(|u| {
|
||||
u.user_id == identifier
|
||||
|| u.access_keys.iter().any(|k| k.access_key == identifier)
|
||||
})
|
||||
.ok_or_else(|| format!("User '{}' not found", identifier))?;
|
||||
|
||||
let new_ak = format!("AK{}", uuid::Uuid::new_v4().simple());
|
||||
let new_sk = format!("SK{}", uuid::Uuid::new_v4().simple());
|
||||
|
||||
let key = AccessKey {
|
||||
access_key: new_ak.clone(),
|
||||
secret_key: new_sk.clone(),
|
||||
status: "active".to_string(),
|
||||
created_at: Some(chrono::Utc::now().to_rfc3339()),
|
||||
};
|
||||
user.access_keys.push(key);
|
||||
|
||||
let json = serde_json::to_string_pretty(&config)
|
||||
.map_err(|e| format!("Failed to serialize IAM config: {}", e))?;
|
||||
std::fs::write(&self.config_path, json)
|
||||
.map_err(|e| format!("Failed to write IAM config: {}", e))?;
|
||||
|
||||
self.reload();
|
||||
Ok(serde_json::json!({
|
||||
"access_key": new_ak,
|
||||
"secret_key": new_sk,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn delete_access_key(&self, access_key: &str) -> Result<(), String> {
|
||||
let content = std::fs::read_to_string(&self.config_path)
|
||||
.map_err(|e| format!("Failed to read IAM config: {}", e))?;
|
||||
let raw: RawIamConfig = serde_json::from_str(&content)
|
||||
.map_err(|e| format!("Failed to parse IAM config: {}", e))?;
|
||||
let mut config = IamConfig {
|
||||
version: 2,
|
||||
users: raw.users.into_iter().map(|u| u.normalize()).collect(),
|
||||
};
|
||||
|
||||
let mut found = false;
|
||||
for user in &mut config.users {
|
||||
if user.access_keys.iter().any(|k| k.access_key == access_key) {
|
||||
if user.access_keys.len() <= 1 {
|
||||
return Err("Cannot delete the last access key".to_string());
|
||||
}
|
||||
user.access_keys.retain(|k| k.access_key != access_key);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
return Err(format!("Access key '{}' not found", access_key));
|
||||
}
|
||||
|
||||
let json = serde_json::to_string_pretty(&config)
|
||||
.map_err(|e| format!("Failed to serialize IAM config: {}", e))?;
|
||||
std::fs::write(&self.config_path, json)
|
||||
.map_err(|e| format!("Failed to write IAM config: {}", e))?;
|
||||
|
||||
self.reload();
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn bucket_matches(policy_bucket: &str, bucket: &str) -> bool {
|
||||
let pb = policy_bucket.trim().to_ascii_lowercase();
|
||||
pb == "*" || pb == bucket
|
||||
}
|
||||
|
||||
fn action_matches(policy_actions: &[String], action: &str) -> bool {
|
||||
for policy_action in policy_actions {
|
||||
let pa = policy_action.trim().to_ascii_lowercase();
|
||||
if pa == "*" || pa == action {
|
||||
return true;
|
||||
}
|
||||
if pa == "iam:*" && action.starts_with("iam:") {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn prefix_matches(policy_prefix: &str, object_key: &str) -> bool {
|
||||
let p = policy_prefix.trim();
|
||||
if p.is_empty() || p == "*" {
|
||||
return true;
|
||||
}
|
||||
let base = p.trim_end_matches('*');
|
||||
object_key.starts_with(base)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Write;
|
||||
|
||||
fn test_iam_json() -> String {
|
||||
serde_json::json!({
|
||||
"version": 2,
|
||||
"users": [{
|
||||
"user_id": "u-test1234",
|
||||
"display_name": "admin",
|
||||
"enabled": true,
|
||||
"access_keys": [{
|
||||
"access_key": "AKIAIOSFODNN7EXAMPLE",
|
||||
"secret_key": "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
|
||||
"status": "active",
|
||||
"created_at": "2024-01-01T00:00:00Z"
|
||||
}],
|
||||
"policies": [{
|
||||
"bucket": "*",
|
||||
"actions": ["*"],
|
||||
"prefix": "*"
|
||||
}]
|
||||
}]
|
||||
})
|
||||
.to_string()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_load_and_lookup() {
|
||||
let mut tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
tmp.write_all(test_iam_json().as_bytes()).unwrap();
|
||||
tmp.flush().unwrap();
|
||||
|
||||
let svc = IamService::new(tmp.path().to_path_buf());
|
||||
let secret = svc.get_secret_key("AKIAIOSFODNN7EXAMPLE");
|
||||
assert_eq!(
|
||||
secret.unwrap(),
|
||||
"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_principal() {
|
||||
let mut tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
tmp.write_all(test_iam_json().as_bytes()).unwrap();
|
||||
tmp.flush().unwrap();
|
||||
|
||||
let svc = IamService::new(tmp.path().to_path_buf());
|
||||
let principal = svc.get_principal("AKIAIOSFODNN7EXAMPLE").unwrap();
|
||||
assert_eq!(principal.display_name, "admin");
|
||||
assert_eq!(principal.user_id, "u-test1234");
|
||||
assert!(principal.is_admin);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_authenticate_success() {
|
||||
let mut tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
tmp.write_all(test_iam_json().as_bytes()).unwrap();
|
||||
tmp.flush().unwrap();
|
||||
|
||||
let svc = IamService::new(tmp.path().to_path_buf());
|
||||
let principal = svc
|
||||
.authenticate(
|
||||
"AKIAIOSFODNN7EXAMPLE",
|
||||
"wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(principal.display_name, "admin");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_authenticate_wrong_secret() {
|
||||
let mut tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
tmp.write_all(test_iam_json().as_bytes()).unwrap();
|
||||
tmp.flush().unwrap();
|
||||
|
||||
let svc = IamService::new(tmp.path().to_path_buf());
|
||||
assert!(svc.authenticate("AKIAIOSFODNN7EXAMPLE", "wrongsecret").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unknown_key_returns_none() {
|
||||
let mut tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
tmp.write_all(test_iam_json().as_bytes()).unwrap();
|
||||
tmp.flush().unwrap();
|
||||
|
||||
let svc = IamService::new(tmp.path().to_path_buf());
|
||||
assert!(svc.get_secret_key("NONEXISTENTKEY").is_none());
|
||||
assert!(svc.get_principal("NONEXISTENTKEY").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_disabled_user() {
|
||||
let json = serde_json::json!({
|
||||
"version": 2,
|
||||
"users": [{
|
||||
"user_id": "u-disabled",
|
||||
"display_name": "disabled-user",
|
||||
"enabled": false,
|
||||
"access_keys": [{
|
||||
"access_key": "DISABLED_KEY",
|
||||
"secret_key": "secret123",
|
||||
"status": "active"
|
||||
}],
|
||||
"policies": []
|
||||
}]
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let mut tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
tmp.write_all(json.as_bytes()).unwrap();
|
||||
tmp.flush().unwrap();
|
||||
|
||||
let svc = IamService::new(tmp.path().to_path_buf());
|
||||
assert!(svc.get_secret_key("DISABLED_KEY").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_inactive_key() {
|
||||
let json = serde_json::json!({
|
||||
"version": 2,
|
||||
"users": [{
|
||||
"user_id": "u-test",
|
||||
"display_name": "test",
|
||||
"enabled": true,
|
||||
"access_keys": [{
|
||||
"access_key": "INACTIVE_KEY",
|
||||
"secret_key": "secret123",
|
||||
"status": "inactive"
|
||||
}],
|
||||
"policies": []
|
||||
}]
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let mut tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
tmp.write_all(json.as_bytes()).unwrap();
|
||||
tmp.flush().unwrap();
|
||||
|
||||
let svc = IamService::new(tmp.path().to_path_buf());
|
||||
assert!(svc.get_secret_key("INACTIVE_KEY").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_v1_flat_format() {
|
||||
let json = serde_json::json!({
|
||||
"users": [{
|
||||
"access_key": "test",
|
||||
"secret_key": "secret",
|
||||
"display_name": "Test User",
|
||||
"policies": [{"bucket": "*", "actions": ["*"], "prefix": "*"}]
|
||||
}]
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let mut tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
tmp.write_all(json.as_bytes()).unwrap();
|
||||
tmp.flush().unwrap();
|
||||
|
||||
let svc = IamService::new(tmp.path().to_path_buf());
|
||||
let secret = svc.get_secret_key("test");
|
||||
assert_eq!(secret.unwrap(), "secret");
|
||||
|
||||
let principal = svc.get_principal("test").unwrap();
|
||||
assert_eq!(principal.display_name, "Test User");
|
||||
assert!(principal.is_admin);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_authorize_allows_matching_policy() {
|
||||
let json = serde_json::json!({
|
||||
"version": 2,
|
||||
"users": [{
|
||||
"user_id": "u-reader",
|
||||
"display_name": "reader",
|
||||
"enabled": true,
|
||||
"access_keys": [{
|
||||
"access_key": "READER_KEY",
|
||||
"secret_key": "reader-secret",
|
||||
"status": "active"
|
||||
}],
|
||||
"policies": [{
|
||||
"bucket": "docs",
|
||||
"actions": ["read"],
|
||||
"prefix": "reports/"
|
||||
}]
|
||||
}]
|
||||
})
|
||||
.to_string();
|
||||
|
||||
let mut tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
tmp.write_all(json.as_bytes()).unwrap();
|
||||
tmp.flush().unwrap();
|
||||
|
||||
let svc = IamService::new(tmp.path().to_path_buf());
|
||||
let principal = svc.get_principal("READER_KEY").unwrap();
|
||||
|
||||
assert!(svc.authorize(
|
||||
&principal,
|
||||
Some("docs"),
|
||||
"read",
|
||||
Some("reports/2026.csv"),
|
||||
));
|
||||
assert!(!svc.authorize(
|
||||
&principal,
|
||||
Some("docs"),
|
||||
"write",
|
||||
Some("reports/2026.csv"),
|
||||
));
|
||||
assert!(!svc.authorize(
|
||||
&principal,
|
||||
Some("docs"),
|
||||
"read",
|
||||
Some("private/2026.csv"),
|
||||
));
|
||||
assert!(!svc.authorize(
|
||||
&principal,
|
||||
Some("other"),
|
||||
"read",
|
||||
Some("reports/2026.csv"),
|
||||
));
|
||||
}
|
||||
}
|
||||
4
myfsio-engine/crates/myfsio-auth/src/lib.rs
Normal file
4
myfsio-engine/crates/myfsio-auth/src/lib.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod sigv4;
|
||||
pub mod principal;
|
||||
pub mod iam;
|
||||
mod fernet;
|
||||
1
myfsio-engine/crates/myfsio-auth/src/principal.rs
Normal file
1
myfsio-engine/crates/myfsio-auth/src/principal.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub use myfsio_common::types::Principal;
|
||||
258
myfsio-engine/crates/myfsio-auth/src/sigv4.rs
Normal file
258
myfsio-engine/crates/myfsio-auth/src/sigv4.rs
Normal file
@@ -0,0 +1,258 @@
|
||||
use hmac::{Hmac, Mac};
|
||||
use lru::LruCache;
|
||||
use parking_lot::Mutex;
|
||||
use percent_encoding::{percent_encode, AsciiSet, NON_ALPHANUMERIC};
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::num::NonZeroUsize;
|
||||
use std::sync::LazyLock;
|
||||
use std::time::Instant;
|
||||
|
||||
type HmacSha256 = Hmac<Sha256>;
|
||||
|
||||
struct CacheEntry {
|
||||
key: Vec<u8>,
|
||||
created: Instant,
|
||||
}
|
||||
|
||||
static SIGNING_KEY_CACHE: LazyLock<Mutex<LruCache<(String, String, String, String), CacheEntry>>> =
|
||||
LazyLock::new(|| Mutex::new(LruCache::new(NonZeroUsize::new(256).unwrap())));
|
||||
|
||||
const CACHE_TTL_SECS: u64 = 60;
|
||||
|
||||
const AWS_ENCODE_SET: &AsciiSet = &NON_ALPHANUMERIC
|
||||
.remove(b'-')
|
||||
.remove(b'_')
|
||||
.remove(b'.')
|
||||
.remove(b'~');
|
||||
|
||||
fn hmac_sha256(key: &[u8], msg: &[u8]) -> Vec<u8> {
|
||||
let mut mac = HmacSha256::new_from_slice(key).expect("HMAC key length is always valid");
|
||||
mac.update(msg);
|
||||
mac.finalize().into_bytes().to_vec()
|
||||
}
|
||||
|
||||
fn sha256_hex(data: &[u8]) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data);
|
||||
hex::encode(hasher.finalize())
|
||||
}
|
||||
|
||||
fn aws_uri_encode(input: &str) -> String {
|
||||
percent_encode(input.as_bytes(), AWS_ENCODE_SET).to_string()
|
||||
}
|
||||
|
||||
pub fn derive_signing_key_cached(
|
||||
secret_key: &str,
|
||||
date_stamp: &str,
|
||||
region: &str,
|
||||
service: &str,
|
||||
) -> Vec<u8> {
|
||||
let cache_key = (
|
||||
secret_key.to_owned(),
|
||||
date_stamp.to_owned(),
|
||||
region.to_owned(),
|
||||
service.to_owned(),
|
||||
);
|
||||
|
||||
{
|
||||
let mut cache = SIGNING_KEY_CACHE.lock();
|
||||
if let Some(entry) = cache.get(&cache_key) {
|
||||
if entry.created.elapsed().as_secs() < CACHE_TTL_SECS {
|
||||
return entry.key.clone();
|
||||
}
|
||||
cache.pop(&cache_key);
|
||||
}
|
||||
}
|
||||
|
||||
let k_date = hmac_sha256(format!("AWS4{}", secret_key).as_bytes(), date_stamp.as_bytes());
|
||||
let k_region = hmac_sha256(&k_date, region.as_bytes());
|
||||
let k_service = hmac_sha256(&k_region, service.as_bytes());
|
||||
let k_signing = hmac_sha256(&k_service, b"aws4_request");
|
||||
|
||||
{
|
||||
let mut cache = SIGNING_KEY_CACHE.lock();
|
||||
cache.put(
|
||||
cache_key,
|
||||
CacheEntry {
|
||||
key: k_signing.clone(),
|
||||
created: Instant::now(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
k_signing
|
||||
}
|
||||
|
||||
fn constant_time_compare_inner(a: &[u8], b: &[u8]) -> bool {
|
||||
if a.len() != b.len() {
|
||||
return false;
|
||||
}
|
||||
let mut result: u8 = 0;
|
||||
for (x, y) in a.iter().zip(b.iter()) {
|
||||
result |= x ^ y;
|
||||
}
|
||||
result == 0
|
||||
}
|
||||
|
||||
pub fn verify_sigv4_signature(
|
||||
method: &str,
|
||||
canonical_uri: &str,
|
||||
query_params: &[(String, String)],
|
||||
signed_headers_str: &str,
|
||||
header_values: &[(String, String)],
|
||||
payload_hash: &str,
|
||||
amz_date: &str,
|
||||
date_stamp: &str,
|
||||
region: &str,
|
||||
service: &str,
|
||||
secret_key: &str,
|
||||
provided_signature: &str,
|
||||
) -> bool {
|
||||
let mut sorted_params = query_params.to_vec();
|
||||
sorted_params.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
|
||||
|
||||
let canonical_query_string = sorted_params
|
||||
.iter()
|
||||
.map(|(k, v)| format!("{}={}", aws_uri_encode(k), aws_uri_encode(v)))
|
||||
.collect::<Vec<_>>()
|
||||
.join("&");
|
||||
|
||||
let mut canonical_headers = String::new();
|
||||
for (name, value) in header_values {
|
||||
let lower_name = name.to_lowercase();
|
||||
let normalized = value.split_whitespace().collect::<Vec<_>>().join(" ");
|
||||
let final_value = if lower_name == "expect" && normalized.is_empty() {
|
||||
"100-continue"
|
||||
} else {
|
||||
&normalized
|
||||
};
|
||||
canonical_headers.push_str(&lower_name);
|
||||
canonical_headers.push(':');
|
||||
canonical_headers.push_str(final_value);
|
||||
canonical_headers.push('\n');
|
||||
}
|
||||
|
||||
let canonical_request = format!(
|
||||
"{}\n{}\n{}\n{}\n{}\n{}",
|
||||
method, canonical_uri, canonical_query_string, canonical_headers, signed_headers_str,
|
||||
payload_hash
|
||||
);
|
||||
|
||||
let credential_scope = format!("{}/{}/{}/aws4_request", date_stamp, region, service);
|
||||
let cr_hash = sha256_hex(canonical_request.as_bytes());
|
||||
let string_to_sign = format!(
|
||||
"AWS4-HMAC-SHA256\n{}\n{}\n{}",
|
||||
amz_date, credential_scope, cr_hash
|
||||
);
|
||||
|
||||
let signing_key = derive_signing_key_cached(secret_key, date_stamp, region, service);
|
||||
let calculated = hmac_sha256(&signing_key, string_to_sign.as_bytes());
|
||||
let calculated_hex = hex::encode(&calculated);
|
||||
|
||||
constant_time_compare_inner(calculated_hex.as_bytes(), provided_signature.as_bytes())
|
||||
}
|
||||
|
||||
pub fn derive_signing_key(
|
||||
secret_key: &str,
|
||||
date_stamp: &str,
|
||||
region: &str,
|
||||
service: &str,
|
||||
) -> Vec<u8> {
|
||||
derive_signing_key_cached(secret_key, date_stamp, region, service)
|
||||
}
|
||||
|
||||
pub fn compute_signature(signing_key: &[u8], string_to_sign: &str) -> String {
|
||||
let sig = hmac_sha256(signing_key, string_to_sign.as_bytes());
|
||||
hex::encode(sig)
|
||||
}
|
||||
|
||||
pub fn build_string_to_sign(
|
||||
amz_date: &str,
|
||||
credential_scope: &str,
|
||||
canonical_request: &str,
|
||||
) -> String {
|
||||
let cr_hash = sha256_hex(canonical_request.as_bytes());
|
||||
format!(
|
||||
"AWS4-HMAC-SHA256\n{}\n{}\n{}",
|
||||
amz_date, credential_scope, cr_hash
|
||||
)
|
||||
}
|
||||
|
||||
pub fn constant_time_compare(a: &str, b: &str) -> bool {
|
||||
constant_time_compare_inner(a.as_bytes(), b.as_bytes())
|
||||
}
|
||||
|
||||
pub fn clear_signing_key_cache() {
|
||||
SIGNING_KEY_CACHE.lock().clear();
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_derive_signing_key() {
|
||||
let key = derive_signing_key("wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", "20130524", "us-east-1", "s3");
|
||||
assert_eq!(key.len(), 32);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_derive_signing_key_cached() {
|
||||
let key1 = derive_signing_key("secret", "20240101", "us-east-1", "s3");
|
||||
let key2 = derive_signing_key("secret", "20240101", "us-east-1", "s3");
|
||||
assert_eq!(key1, key2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_constant_time_compare() {
|
||||
assert!(constant_time_compare("abc", "abc"));
|
||||
assert!(!constant_time_compare("abc", "abd"));
|
||||
assert!(!constant_time_compare("abc", "abcd"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_string_to_sign() {
|
||||
let result = build_string_to_sign("20130524T000000Z", "20130524/us-east-1/s3/aws4_request", "GET\n/\n\nhost:example.com\n\nhost\nUNSIGNED-PAYLOAD");
|
||||
assert!(result.starts_with("AWS4-HMAC-SHA256\n"));
|
||||
assert!(result.contains("20130524T000000Z"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_aws_uri_encode() {
|
||||
assert_eq!(aws_uri_encode("hello world"), "hello%20world");
|
||||
assert_eq!(aws_uri_encode("test-file_name.txt"), "test-file_name.txt");
|
||||
assert_eq!(aws_uri_encode("a/b"), "a%2Fb");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_verify_sigv4_roundtrip() {
|
||||
let secret = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY";
|
||||
let date_stamp = "20130524";
|
||||
let region = "us-east-1";
|
||||
let service = "s3";
|
||||
let amz_date = "20130524T000000Z";
|
||||
|
||||
let signing_key = derive_signing_key(secret, date_stamp, region, service);
|
||||
|
||||
let canonical_request = "GET\n/\n\nhost:examplebucket.s3.amazonaws.com\n\nhost\nUNSIGNED-PAYLOAD";
|
||||
let string_to_sign = build_string_to_sign(amz_date, &format!("{}/{}/{}/aws4_request", date_stamp, region, service), canonical_request);
|
||||
|
||||
let signature = compute_signature(&signing_key, &string_to_sign);
|
||||
|
||||
let result = verify_sigv4_signature(
|
||||
"GET",
|
||||
"/",
|
||||
&[],
|
||||
"host",
|
||||
&[("host".to_string(), "examplebucket.s3.amazonaws.com".to_string())],
|
||||
"UNSIGNED-PAYLOAD",
|
||||
amz_date,
|
||||
date_stamp,
|
||||
region,
|
||||
service,
|
||||
secret,
|
||||
&signature,
|
||||
);
|
||||
assert!(result);
|
||||
}
|
||||
}
|
||||
11
myfsio-engine/crates/myfsio-common/Cargo.toml
Normal file
11
myfsio-engine/crates/myfsio-common/Cargo.toml
Normal file
@@ -0,0 +1,11 @@
|
||||
[package]
|
||||
name = "myfsio-common"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
thiserror = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
20
myfsio-engine/crates/myfsio-common/src/constants.rs
Normal file
20
myfsio-engine/crates/myfsio-common/src/constants.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
pub const SYSTEM_ROOT: &str = ".myfsio.sys";
|
||||
pub const SYSTEM_BUCKETS_DIR: &str = "buckets";
|
||||
pub const SYSTEM_MULTIPART_DIR: &str = "multipart";
|
||||
pub const BUCKET_META_DIR: &str = "meta";
|
||||
pub const BUCKET_VERSIONS_DIR: &str = "versions";
|
||||
pub const BUCKET_CONFIG_FILE: &str = ".bucket.json";
|
||||
pub const STATS_FILE: &str = "stats.json";
|
||||
pub const ETAG_INDEX_FILE: &str = "etag_index.json";
|
||||
pub const INDEX_FILE: &str = "_index.json";
|
||||
pub const MANIFEST_FILE: &str = "manifest.json";
|
||||
|
||||
pub const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
|
||||
|
||||
pub const DEFAULT_REGION: &str = "us-east-1";
|
||||
pub const AWS_SERVICE: &str = "s3";
|
||||
|
||||
pub const DEFAULT_MAX_KEYS: usize = 1000;
|
||||
pub const DEFAULT_OBJECT_KEY_MAX_BYTES: usize = 1024;
|
||||
pub const DEFAULT_CHUNK_SIZE: usize = 65536;
|
||||
pub const STREAM_CHUNK_SIZE: usize = 1_048_576;
|
||||
221
myfsio-engine/crates/myfsio-common/src/error.rs
Normal file
221
myfsio-engine/crates/myfsio-common/src/error.rs
Normal file
@@ -0,0 +1,221 @@
|
||||
use std::fmt;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum S3ErrorCode {
|
||||
AccessDenied,
|
||||
BucketAlreadyExists,
|
||||
BucketNotEmpty,
|
||||
EntityTooLarge,
|
||||
InternalError,
|
||||
InvalidAccessKeyId,
|
||||
InvalidArgument,
|
||||
InvalidBucketName,
|
||||
InvalidKey,
|
||||
InvalidRange,
|
||||
InvalidRequest,
|
||||
MalformedXML,
|
||||
MethodNotAllowed,
|
||||
NoSuchBucket,
|
||||
NoSuchKey,
|
||||
NoSuchUpload,
|
||||
NoSuchVersion,
|
||||
NoSuchTagSet,
|
||||
PreconditionFailed,
|
||||
NotModified,
|
||||
QuotaExceeded,
|
||||
SignatureDoesNotMatch,
|
||||
SlowDown,
|
||||
}
|
||||
|
||||
impl S3ErrorCode {
|
||||
pub fn http_status(&self) -> u16 {
|
||||
match self {
|
||||
Self::AccessDenied => 403,
|
||||
Self::BucketAlreadyExists => 409,
|
||||
Self::BucketNotEmpty => 409,
|
||||
Self::EntityTooLarge => 413,
|
||||
Self::InternalError => 500,
|
||||
Self::InvalidAccessKeyId => 403,
|
||||
Self::InvalidArgument => 400,
|
||||
Self::InvalidBucketName => 400,
|
||||
Self::InvalidKey => 400,
|
||||
Self::InvalidRange => 416,
|
||||
Self::InvalidRequest => 400,
|
||||
Self::MalformedXML => 400,
|
||||
Self::MethodNotAllowed => 405,
|
||||
Self::NoSuchBucket => 404,
|
||||
Self::NoSuchKey => 404,
|
||||
Self::NoSuchUpload => 404,
|
||||
Self::NoSuchVersion => 404,
|
||||
Self::NoSuchTagSet => 404,
|
||||
Self::PreconditionFailed => 412,
|
||||
Self::NotModified => 304,
|
||||
Self::QuotaExceeded => 403,
|
||||
Self::SignatureDoesNotMatch => 403,
|
||||
Self::SlowDown => 429,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::AccessDenied => "AccessDenied",
|
||||
Self::BucketAlreadyExists => "BucketAlreadyExists",
|
||||
Self::BucketNotEmpty => "BucketNotEmpty",
|
||||
Self::EntityTooLarge => "EntityTooLarge",
|
||||
Self::InternalError => "InternalError",
|
||||
Self::InvalidAccessKeyId => "InvalidAccessKeyId",
|
||||
Self::InvalidArgument => "InvalidArgument",
|
||||
Self::InvalidBucketName => "InvalidBucketName",
|
||||
Self::InvalidKey => "InvalidKey",
|
||||
Self::InvalidRange => "InvalidRange",
|
||||
Self::InvalidRequest => "InvalidRequest",
|
||||
Self::MalformedXML => "MalformedXML",
|
||||
Self::MethodNotAllowed => "MethodNotAllowed",
|
||||
Self::NoSuchBucket => "NoSuchBucket",
|
||||
Self::NoSuchKey => "NoSuchKey",
|
||||
Self::NoSuchUpload => "NoSuchUpload",
|
||||
Self::NoSuchVersion => "NoSuchVersion",
|
||||
Self::NoSuchTagSet => "NoSuchTagSet",
|
||||
Self::PreconditionFailed => "PreconditionFailed",
|
||||
Self::NotModified => "NotModified",
|
||||
Self::QuotaExceeded => "QuotaExceeded",
|
||||
Self::SignatureDoesNotMatch => "SignatureDoesNotMatch",
|
||||
Self::SlowDown => "SlowDown",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn default_message(&self) -> &'static str {
|
||||
match self {
|
||||
Self::AccessDenied => "Access Denied",
|
||||
Self::BucketAlreadyExists => "The requested bucket name is not available",
|
||||
Self::BucketNotEmpty => "The bucket you tried to delete is not empty",
|
||||
Self::EntityTooLarge => "Your proposed upload exceeds the maximum allowed size",
|
||||
Self::InternalError => "We encountered an internal error. Please try again.",
|
||||
Self::InvalidAccessKeyId => "The access key ID you provided does not exist",
|
||||
Self::InvalidArgument => "Invalid argument",
|
||||
Self::InvalidBucketName => "The specified bucket is not valid",
|
||||
Self::InvalidKey => "The specified key is not valid",
|
||||
Self::InvalidRange => "The requested range is not satisfiable",
|
||||
Self::InvalidRequest => "Invalid request",
|
||||
Self::MalformedXML => "The XML you provided was not well-formed",
|
||||
Self::MethodNotAllowed => "The specified method is not allowed against this resource",
|
||||
Self::NoSuchBucket => "The specified bucket does not exist",
|
||||
Self::NoSuchKey => "The specified key does not exist",
|
||||
Self::NoSuchUpload => "The specified multipart upload does not exist",
|
||||
Self::NoSuchVersion => "The specified version does not exist",
|
||||
Self::NoSuchTagSet => "The TagSet does not exist",
|
||||
Self::PreconditionFailed => "At least one of the preconditions you specified did not hold",
|
||||
Self::NotModified => "Not Modified",
|
||||
Self::QuotaExceeded => "The bucket quota has been exceeded",
|
||||
Self::SignatureDoesNotMatch => "The request signature we calculated does not match the signature you provided",
|
||||
Self::SlowDown => "Please reduce your request rate",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for S3ErrorCode {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.write_str(self.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct S3Error {
|
||||
pub code: S3ErrorCode,
|
||||
pub message: String,
|
||||
pub resource: String,
|
||||
pub request_id: String,
|
||||
}
|
||||
|
||||
impl S3Error {
|
||||
pub fn new(code: S3ErrorCode, message: impl Into<String>) -> Self {
|
||||
Self {
|
||||
code,
|
||||
message: message.into(),
|
||||
resource: String::new(),
|
||||
request_id: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_code(code: S3ErrorCode) -> Self {
|
||||
Self::new(code, code.default_message())
|
||||
}
|
||||
|
||||
pub fn with_resource(mut self, resource: impl Into<String>) -> Self {
|
||||
self.resource = resource.into();
|
||||
self
|
||||
}
|
||||
|
||||
pub fn with_request_id(mut self, request_id: impl Into<String>) -> Self {
|
||||
self.request_id = request_id.into();
|
||||
self
|
||||
}
|
||||
|
||||
pub fn http_status(&self) -> u16 {
|
||||
self.code.http_status()
|
||||
}
|
||||
|
||||
pub fn to_xml(&self) -> String {
|
||||
format!(
|
||||
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\
|
||||
<Error>\
|
||||
<Code>{}</Code>\
|
||||
<Message>{}</Message>\
|
||||
<Resource>{}</Resource>\
|
||||
<RequestId>{}</RequestId>\
|
||||
</Error>",
|
||||
self.code.as_str(),
|
||||
xml_escape(&self.message),
|
||||
xml_escape(&self.resource),
|
||||
xml_escape(&self.request_id),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for S3Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}: {}", self.code, self.message)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for S3Error {}
|
||||
|
||||
fn xml_escape(s: &str) -> String {
|
||||
s.replace('&', "&")
|
||||
.replace('<', "<")
|
||||
.replace('>', ">")
|
||||
.replace('"', """)
|
||||
.replace('\'', "'")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_error_codes() {
|
||||
assert_eq!(S3ErrorCode::NoSuchKey.http_status(), 404);
|
||||
assert_eq!(S3ErrorCode::AccessDenied.http_status(), 403);
|
||||
assert_eq!(S3ErrorCode::NoSuchBucket.as_str(), "NoSuchBucket");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_error_to_xml() {
|
||||
let err = S3Error::from_code(S3ErrorCode::NoSuchKey)
|
||||
.with_resource("/test-bucket/test-key")
|
||||
.with_request_id("abc123");
|
||||
let xml = err.to_xml();
|
||||
assert!(xml.contains("<Code>NoSuchKey</Code>"));
|
||||
assert!(xml.contains("<Resource>/test-bucket/test-key</Resource>"));
|
||||
assert!(xml.contains("<RequestId>abc123</RequestId>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_xml_escape() {
|
||||
let err = S3Error::new(S3ErrorCode::InvalidArgument, "key <test> & \"value\"")
|
||||
.with_resource("/bucket/key&");
|
||||
let xml = err.to_xml();
|
||||
assert!(xml.contains("<test>"));
|
||||
assert!(xml.contains("&"));
|
||||
}
|
||||
}
|
||||
3
myfsio-engine/crates/myfsio-common/src/lib.rs
Normal file
3
myfsio-engine/crates/myfsio-common/src/lib.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
pub mod constants;
|
||||
pub mod error;
|
||||
pub mod types;
|
||||
176
myfsio-engine/crates/myfsio-common/src/types.rs
Normal file
176
myfsio-engine/crates/myfsio-common/src/types.rs
Normal file
@@ -0,0 +1,176 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ObjectMeta {
|
||||
pub key: String,
|
||||
pub size: u64,
|
||||
pub last_modified: DateTime<Utc>,
|
||||
pub etag: Option<String>,
|
||||
pub content_type: Option<String>,
|
||||
pub storage_class: Option<String>,
|
||||
pub metadata: HashMap<String, String>,
|
||||
}
|
||||
|
||||
impl ObjectMeta {
|
||||
pub fn new(key: String, size: u64, last_modified: DateTime<Utc>) -> Self {
|
||||
Self {
|
||||
key,
|
||||
size,
|
||||
last_modified,
|
||||
etag: None,
|
||||
content_type: None,
|
||||
storage_class: Some("STANDARD".to_string()),
|
||||
metadata: HashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct BucketMeta {
|
||||
pub name: String,
|
||||
pub creation_date: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct BucketStats {
|
||||
pub objects: u64,
|
||||
pub bytes: u64,
|
||||
pub version_count: u64,
|
||||
pub version_bytes: u64,
|
||||
}
|
||||
|
||||
impl BucketStats {
|
||||
pub fn total_objects(&self) -> u64 {
|
||||
self.objects + self.version_count
|
||||
}
|
||||
|
||||
pub fn total_bytes(&self) -> u64 {
|
||||
self.bytes + self.version_bytes
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ListObjectsResult {
|
||||
pub objects: Vec<ObjectMeta>,
|
||||
pub is_truncated: bool,
|
||||
pub next_continuation_token: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ShallowListResult {
|
||||
pub objects: Vec<ObjectMeta>,
|
||||
pub common_prefixes: Vec<String>,
|
||||
pub is_truncated: bool,
|
||||
pub next_continuation_token: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ListParams {
|
||||
pub max_keys: usize,
|
||||
pub continuation_token: Option<String>,
|
||||
pub prefix: Option<String>,
|
||||
pub start_after: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct ShallowListParams {
|
||||
pub prefix: String,
|
||||
pub delimiter: String,
|
||||
pub max_keys: usize,
|
||||
pub continuation_token: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PartMeta {
|
||||
pub part_number: u32,
|
||||
pub etag: String,
|
||||
pub size: u64,
|
||||
pub last_modified: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PartInfo {
|
||||
pub part_number: u32,
|
||||
pub etag: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct MultipartUploadInfo {
|
||||
pub upload_id: String,
|
||||
pub key: String,
|
||||
pub initiated: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct VersionInfo {
|
||||
pub version_id: String,
|
||||
pub key: String,
|
||||
pub size: u64,
|
||||
pub last_modified: DateTime<Utc>,
|
||||
pub etag: Option<String>,
|
||||
pub is_latest: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Tag {
|
||||
pub key: String,
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
|
||||
pub struct BucketConfig {
|
||||
#[serde(default)]
|
||||
pub versioning_enabled: bool,
|
||||
#[serde(default)]
|
||||
pub tags: Vec<Tag>,
|
||||
#[serde(default)]
|
||||
pub cors: Option<serde_json::Value>,
|
||||
#[serde(default)]
|
||||
pub encryption: Option<serde_json::Value>,
|
||||
#[serde(default)]
|
||||
pub lifecycle: Option<serde_json::Value>,
|
||||
#[serde(default)]
|
||||
pub website: Option<serde_json::Value>,
|
||||
#[serde(default)]
|
||||
pub quota: Option<QuotaConfig>,
|
||||
#[serde(default)]
|
||||
pub acl: Option<serde_json::Value>,
|
||||
#[serde(default)]
|
||||
pub notification: Option<serde_json::Value>,
|
||||
#[serde(default)]
|
||||
pub logging: Option<serde_json::Value>,
|
||||
#[serde(default)]
|
||||
pub object_lock: Option<serde_json::Value>,
|
||||
#[serde(default)]
|
||||
pub policy: Option<serde_json::Value>,
|
||||
#[serde(default)]
|
||||
pub replication: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct QuotaConfig {
|
||||
pub max_bytes: Option<u64>,
|
||||
pub max_objects: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Principal {
|
||||
pub access_key: String,
|
||||
pub user_id: String,
|
||||
pub display_name: String,
|
||||
pub is_admin: bool,
|
||||
}
|
||||
|
||||
impl Principal {
|
||||
pub fn new(access_key: String, user_id: String, display_name: String, is_admin: bool) -> Self {
|
||||
Self {
|
||||
access_key,
|
||||
user_id,
|
||||
display_name,
|
||||
is_admin,
|
||||
}
|
||||
}
|
||||
}
|
||||
24
myfsio-engine/crates/myfsio-crypto/Cargo.toml
Normal file
24
myfsio-engine/crates/myfsio-crypto/Cargo.toml
Normal file
@@ -0,0 +1,24 @@
|
||||
[package]
|
||||
name = "myfsio-crypto"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
myfsio-common = { path = "../myfsio-common" }
|
||||
md-5 = { workspace = true }
|
||||
sha2 = { workspace = true }
|
||||
hex = { workspace = true }
|
||||
aes-gcm = { workspace = true }
|
||||
hkdf = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
base64 = { workspace = true }
|
||||
rand = "0.8"
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
|
||||
tempfile = "3"
|
||||
238
myfsio-engine/crates/myfsio-crypto/src/aes_gcm.rs
Normal file
238
myfsio-engine/crates/myfsio-crypto/src/aes_gcm.rs
Normal file
@@ -0,0 +1,238 @@
|
||||
use aes_gcm::aead::Aead;
|
||||
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
|
||||
use hkdf::Hkdf;
|
||||
use sha2::Sha256;
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Seek, SeekFrom, Write};
|
||||
use std::path::Path;
|
||||
use thiserror::Error;
|
||||
|
||||
const DEFAULT_CHUNK_SIZE: usize = 65536;
|
||||
const HEADER_SIZE: usize = 4;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum CryptoError {
|
||||
#[error("IO error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("Invalid key size: expected 32 bytes, got {0}")]
|
||||
InvalidKeySize(usize),
|
||||
#[error("Invalid nonce size: expected 12 bytes, got {0}")]
|
||||
InvalidNonceSize(usize),
|
||||
#[error("Encryption failed: {0}")]
|
||||
EncryptionFailed(String),
|
||||
#[error("Decryption failed at chunk {0}")]
|
||||
DecryptionFailed(u32),
|
||||
#[error("HKDF expand failed: {0}")]
|
||||
HkdfFailed(String),
|
||||
}
|
||||
|
||||
fn read_exact_chunk(reader: &mut impl Read, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||
let mut filled = 0;
|
||||
while filled < buf.len() {
|
||||
match reader.read(&mut buf[filled..]) {
|
||||
Ok(0) => break,
|
||||
Ok(n) => filled += n,
|
||||
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
Ok(filled)
|
||||
}
|
||||
|
||||
fn derive_chunk_nonce(base_nonce: &[u8], chunk_index: u32) -> Result<[u8; 12], CryptoError> {
|
||||
let hkdf = Hkdf::<Sha256>::new(Some(base_nonce), b"chunk_nonce");
|
||||
let mut okm = [0u8; 12];
|
||||
hkdf.expand(&chunk_index.to_be_bytes(), &mut okm)
|
||||
.map_err(|e| CryptoError::HkdfFailed(e.to_string()))?;
|
||||
Ok(okm)
|
||||
}
|
||||
|
||||
pub fn encrypt_stream_chunked(
|
||||
input_path: &Path,
|
||||
output_path: &Path,
|
||||
key: &[u8],
|
||||
base_nonce: &[u8],
|
||||
chunk_size: Option<usize>,
|
||||
) -> Result<u32, CryptoError> {
|
||||
if key.len() != 32 {
|
||||
return Err(CryptoError::InvalidKeySize(key.len()));
|
||||
}
|
||||
if base_nonce.len() != 12 {
|
||||
return Err(CryptoError::InvalidNonceSize(base_nonce.len()));
|
||||
}
|
||||
|
||||
let chunk_size = chunk_size.unwrap_or(DEFAULT_CHUNK_SIZE);
|
||||
let key_arr: [u8; 32] = key.try_into().unwrap();
|
||||
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
|
||||
let cipher = Aes256Gcm::new(&key_arr.into());
|
||||
|
||||
let mut infile = File::open(input_path)?;
|
||||
let mut outfile = File::create(output_path)?;
|
||||
|
||||
outfile.write_all(&[0u8; 4])?;
|
||||
|
||||
let mut buf = vec![0u8; chunk_size];
|
||||
let mut chunk_index: u32 = 0;
|
||||
|
||||
loop {
|
||||
let n = read_exact_chunk(&mut infile, &mut buf)?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)?;
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
|
||||
let encrypted = cipher
|
||||
.encrypt(nonce, &buf[..n])
|
||||
.map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?;
|
||||
|
||||
let size = encrypted.len() as u32;
|
||||
outfile.write_all(&size.to_be_bytes())?;
|
||||
outfile.write_all(&encrypted)?;
|
||||
|
||||
chunk_index += 1;
|
||||
}
|
||||
|
||||
outfile.seek(SeekFrom::Start(0))?;
|
||||
outfile.write_all(&chunk_index.to_be_bytes())?;
|
||||
|
||||
Ok(chunk_index)
|
||||
}
|
||||
|
||||
pub fn decrypt_stream_chunked(
|
||||
input_path: &Path,
|
||||
output_path: &Path,
|
||||
key: &[u8],
|
||||
base_nonce: &[u8],
|
||||
) -> Result<u32, CryptoError> {
|
||||
if key.len() != 32 {
|
||||
return Err(CryptoError::InvalidKeySize(key.len()));
|
||||
}
|
||||
if base_nonce.len() != 12 {
|
||||
return Err(CryptoError::InvalidNonceSize(base_nonce.len()));
|
||||
}
|
||||
|
||||
let key_arr: [u8; 32] = key.try_into().unwrap();
|
||||
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
|
||||
let cipher = Aes256Gcm::new(&key_arr.into());
|
||||
|
||||
let mut infile = File::open(input_path)?;
|
||||
let mut outfile = File::create(output_path)?;
|
||||
|
||||
let mut header = [0u8; HEADER_SIZE];
|
||||
infile.read_exact(&mut header)?;
|
||||
let chunk_count = u32::from_be_bytes(header);
|
||||
|
||||
let mut size_buf = [0u8; HEADER_SIZE];
|
||||
for chunk_index in 0..chunk_count {
|
||||
infile.read_exact(&mut size_buf)?;
|
||||
let chunk_size = u32::from_be_bytes(size_buf) as usize;
|
||||
|
||||
let mut encrypted = vec![0u8; chunk_size];
|
||||
infile.read_exact(&mut encrypted)?;
|
||||
|
||||
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)?;
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
|
||||
let decrypted = cipher
|
||||
.decrypt(nonce, encrypted.as_ref())
|
||||
.map_err(|_| CryptoError::DecryptionFailed(chunk_index))?;
|
||||
|
||||
outfile.write_all(&decrypted)?;
|
||||
}
|
||||
|
||||
Ok(chunk_count)
|
||||
}
|
||||
|
||||
pub async fn encrypt_stream_chunked_async(
|
||||
input_path: &Path,
|
||||
output_path: &Path,
|
||||
key: &[u8],
|
||||
base_nonce: &[u8],
|
||||
chunk_size: Option<usize>,
|
||||
) -> Result<u32, CryptoError> {
|
||||
let input_path = input_path.to_owned();
|
||||
let output_path = output_path.to_owned();
|
||||
let key = key.to_vec();
|
||||
let base_nonce = base_nonce.to_vec();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
encrypt_stream_chunked(&input_path, &output_path, &key, &base_nonce, chunk_size)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| CryptoError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))?
|
||||
}
|
||||
|
||||
pub async fn decrypt_stream_chunked_async(
|
||||
input_path: &Path,
|
||||
output_path: &Path,
|
||||
key: &[u8],
|
||||
base_nonce: &[u8],
|
||||
) -> Result<u32, CryptoError> {
|
||||
let input_path = input_path.to_owned();
|
||||
let output_path = output_path.to_owned();
|
||||
let key = key.to_vec();
|
||||
let base_nonce = base_nonce.to_vec();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
decrypt_stream_chunked(&input_path, &output_path, &key, &base_nonce)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| CryptoError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))?
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Write as IoWrite;
|
||||
|
||||
#[test]
|
||||
fn test_encrypt_decrypt_roundtrip() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let input = dir.path().join("input.bin");
|
||||
let encrypted = dir.path().join("encrypted.bin");
|
||||
let decrypted = dir.path().join("decrypted.bin");
|
||||
|
||||
let data = b"Hello, this is a test of AES-256-GCM chunked encryption!";
|
||||
std::fs::File::create(&input).unwrap().write_all(data).unwrap();
|
||||
|
||||
let key = [0x42u8; 32];
|
||||
let nonce = [0x01u8; 12];
|
||||
|
||||
let chunks = encrypt_stream_chunked(&input, &encrypted, &key, &nonce, Some(16)).unwrap();
|
||||
assert!(chunks > 0);
|
||||
|
||||
let chunks2 = decrypt_stream_chunked(&encrypted, &decrypted, &key, &nonce).unwrap();
|
||||
assert_eq!(chunks, chunks2);
|
||||
|
||||
let result = std::fs::read(&decrypted).unwrap();
|
||||
assert_eq!(result, data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_key_size() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let input = dir.path().join("input.bin");
|
||||
std::fs::File::create(&input).unwrap().write_all(b"test").unwrap();
|
||||
|
||||
let result = encrypt_stream_chunked(&input, &dir.path().join("out"), &[0u8; 16], &[0u8; 12], None);
|
||||
assert!(matches!(result, Err(CryptoError::InvalidKeySize(16))));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wrong_key_fails_decrypt() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let input = dir.path().join("input.bin");
|
||||
let encrypted = dir.path().join("encrypted.bin");
|
||||
let decrypted = dir.path().join("decrypted.bin");
|
||||
|
||||
std::fs::File::create(&input).unwrap().write_all(b"secret data").unwrap();
|
||||
|
||||
let key = [0x42u8; 32];
|
||||
let nonce = [0x01u8; 12];
|
||||
encrypt_stream_chunked(&input, &encrypted, &key, &nonce, None).unwrap();
|
||||
|
||||
let wrong_key = [0x43u8; 32];
|
||||
let result = decrypt_stream_chunked(&encrypted, &decrypted, &wrong_key, &nonce);
|
||||
assert!(matches!(result, Err(CryptoError::DecryptionFailed(_))));
|
||||
}
|
||||
}
|
||||
375
myfsio-engine/crates/myfsio-crypto/src/encryption.rs
Normal file
375
myfsio-engine/crates/myfsio-crypto/src/encryption.rs
Normal file
@@ -0,0 +1,375 @@
|
||||
use base64::engine::general_purpose::STANDARD as B64;
|
||||
use base64::Engine;
|
||||
use rand::RngCore;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::aes_gcm::{
|
||||
encrypt_stream_chunked, decrypt_stream_chunked, CryptoError,
|
||||
};
|
||||
use crate::kms::KmsService;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum SseAlgorithm {
|
||||
Aes256,
|
||||
AwsKms,
|
||||
CustomerProvided,
|
||||
}
|
||||
|
||||
impl SseAlgorithm {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
SseAlgorithm::Aes256 => "AES256",
|
||||
SseAlgorithm::AwsKms => "aws:kms",
|
||||
SseAlgorithm::CustomerProvided => "AES256",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EncryptionContext {
|
||||
pub algorithm: SseAlgorithm,
|
||||
pub kms_key_id: Option<String>,
|
||||
pub customer_key: Option<Vec<u8>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EncryptionMetadata {
|
||||
pub algorithm: String,
|
||||
pub nonce: String,
|
||||
pub encrypted_data_key: Option<String>,
|
||||
pub kms_key_id: Option<String>,
|
||||
}
|
||||
|
||||
impl EncryptionMetadata {
|
||||
pub fn to_metadata_map(&self) -> HashMap<String, String> {
|
||||
let mut map = HashMap::new();
|
||||
map.insert(
|
||||
"x-amz-server-side-encryption".to_string(),
|
||||
self.algorithm.clone(),
|
||||
);
|
||||
map.insert("x-amz-encryption-nonce".to_string(), self.nonce.clone());
|
||||
if let Some(ref dk) = self.encrypted_data_key {
|
||||
map.insert("x-amz-encrypted-data-key".to_string(), dk.clone());
|
||||
}
|
||||
if let Some(ref kid) = self.kms_key_id {
|
||||
map.insert("x-amz-encryption-key-id".to_string(), kid.clone());
|
||||
}
|
||||
map
|
||||
}
|
||||
|
||||
pub fn from_metadata(meta: &HashMap<String, String>) -> Option<Self> {
|
||||
let algorithm = meta.get("x-amz-server-side-encryption")?;
|
||||
let nonce = meta.get("x-amz-encryption-nonce")?;
|
||||
Some(Self {
|
||||
algorithm: algorithm.clone(),
|
||||
nonce: nonce.clone(),
|
||||
encrypted_data_key: meta.get("x-amz-encrypted-data-key").cloned(),
|
||||
kms_key_id: meta.get("x-amz-encryption-key-id").cloned(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn is_encrypted(meta: &HashMap<String, String>) -> bool {
|
||||
meta.contains_key("x-amz-server-side-encryption")
|
||||
}
|
||||
|
||||
pub fn clean_metadata(meta: &mut HashMap<String, String>) {
|
||||
meta.remove("x-amz-server-side-encryption");
|
||||
meta.remove("x-amz-encryption-nonce");
|
||||
meta.remove("x-amz-encrypted-data-key");
|
||||
meta.remove("x-amz-encryption-key-id");
|
||||
}
|
||||
}
|
||||
|
||||
pub struct EncryptionService {
|
||||
master_key: [u8; 32],
|
||||
kms: Option<std::sync::Arc<KmsService>>,
|
||||
}
|
||||
|
||||
impl EncryptionService {
|
||||
pub fn new(master_key: [u8; 32], kms: Option<std::sync::Arc<KmsService>>) -> Self {
|
||||
Self { master_key, kms }
|
||||
}
|
||||
|
||||
pub fn generate_data_key(&self) -> ([u8; 32], [u8; 12]) {
|
||||
let mut data_key = [0u8; 32];
|
||||
let mut nonce = [0u8; 12];
|
||||
rand::thread_rng().fill_bytes(&mut data_key);
|
||||
rand::thread_rng().fill_bytes(&mut nonce);
|
||||
(data_key, nonce)
|
||||
}
|
||||
|
||||
pub fn wrap_data_key(&self, data_key: &[u8; 32]) -> Result<String, CryptoError> {
|
||||
use aes_gcm::aead::Aead;
|
||||
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
|
||||
|
||||
let cipher = Aes256Gcm::new((&self.master_key).into());
|
||||
let mut nonce_bytes = [0u8; 12];
|
||||
rand::thread_rng().fill_bytes(&mut nonce_bytes);
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
|
||||
let encrypted = cipher
|
||||
.encrypt(nonce, data_key.as_slice())
|
||||
.map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?;
|
||||
|
||||
let mut combined = Vec::with_capacity(12 + encrypted.len());
|
||||
combined.extend_from_slice(&nonce_bytes);
|
||||
combined.extend_from_slice(&encrypted);
|
||||
Ok(B64.encode(&combined))
|
||||
}
|
||||
|
||||
pub fn unwrap_data_key(&self, wrapped_b64: &str) -> Result<[u8; 32], CryptoError> {
|
||||
use aes_gcm::aead::Aead;
|
||||
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
|
||||
|
||||
let combined = B64.decode(wrapped_b64).map_err(|e| {
|
||||
CryptoError::EncryptionFailed(format!("Bad wrapped key encoding: {}", e))
|
||||
})?;
|
||||
if combined.len() < 12 {
|
||||
return Err(CryptoError::EncryptionFailed(
|
||||
"Wrapped key too short".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let (nonce_bytes, ciphertext) = combined.split_at(12);
|
||||
let cipher = Aes256Gcm::new((&self.master_key).into());
|
||||
let nonce = Nonce::from_slice(nonce_bytes);
|
||||
|
||||
let plaintext = cipher
|
||||
.decrypt(nonce, ciphertext)
|
||||
.map_err(|_| CryptoError::DecryptionFailed(0))?;
|
||||
|
||||
if plaintext.len() != 32 {
|
||||
return Err(CryptoError::InvalidKeySize(plaintext.len()));
|
||||
}
|
||||
let mut key = [0u8; 32];
|
||||
key.copy_from_slice(&plaintext);
|
||||
Ok(key)
|
||||
}
|
||||
|
||||
pub async fn encrypt_object(
|
||||
&self,
|
||||
input_path: &Path,
|
||||
output_path: &Path,
|
||||
ctx: &EncryptionContext,
|
||||
) -> Result<EncryptionMetadata, CryptoError> {
|
||||
let (data_key, nonce) = self.generate_data_key();
|
||||
|
||||
let (encrypted_data_key, kms_key_id) = match ctx.algorithm {
|
||||
SseAlgorithm::Aes256 => {
|
||||
let wrapped = self.wrap_data_key(&data_key)?;
|
||||
(Some(wrapped), None)
|
||||
}
|
||||
SseAlgorithm::AwsKms => {
|
||||
let kms = self
|
||||
.kms
|
||||
.as_ref()
|
||||
.ok_or_else(|| CryptoError::EncryptionFailed("KMS not available".into()))?;
|
||||
let kid = ctx
|
||||
.kms_key_id
|
||||
.as_ref()
|
||||
.ok_or_else(|| CryptoError::EncryptionFailed("No KMS key ID".into()))?;
|
||||
let ciphertext = kms.encrypt_data(kid, &data_key).await?;
|
||||
(Some(B64.encode(&ciphertext)), Some(kid.clone()))
|
||||
}
|
||||
SseAlgorithm::CustomerProvided => {
|
||||
(None, None)
|
||||
}
|
||||
};
|
||||
|
||||
let actual_key = if ctx.algorithm == SseAlgorithm::CustomerProvided {
|
||||
let ck = ctx.customer_key.as_ref().ok_or_else(|| {
|
||||
CryptoError::EncryptionFailed("No customer key provided".into())
|
||||
})?;
|
||||
if ck.len() != 32 {
|
||||
return Err(CryptoError::InvalidKeySize(ck.len()));
|
||||
}
|
||||
let mut k = [0u8; 32];
|
||||
k.copy_from_slice(ck);
|
||||
k
|
||||
} else {
|
||||
data_key
|
||||
};
|
||||
|
||||
let ip = input_path.to_owned();
|
||||
let op = output_path.to_owned();
|
||||
let ak = actual_key;
|
||||
let n = nonce;
|
||||
tokio::task::spawn_blocking(move || {
|
||||
encrypt_stream_chunked(&ip, &op, &ak, &n, None)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| CryptoError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))??;
|
||||
|
||||
Ok(EncryptionMetadata {
|
||||
algorithm: ctx.algorithm.as_str().to_string(),
|
||||
nonce: B64.encode(nonce),
|
||||
encrypted_data_key,
|
||||
kms_key_id,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn decrypt_object(
|
||||
&self,
|
||||
input_path: &Path,
|
||||
output_path: &Path,
|
||||
enc_meta: &EncryptionMetadata,
|
||||
customer_key: Option<&[u8]>,
|
||||
) -> Result<(), CryptoError> {
|
||||
let nonce_bytes = B64.decode(&enc_meta.nonce).map_err(|e| {
|
||||
CryptoError::EncryptionFailed(format!("Bad nonce encoding: {}", e))
|
||||
})?;
|
||||
if nonce_bytes.len() != 12 {
|
||||
return Err(CryptoError::InvalidNonceSize(nonce_bytes.len()));
|
||||
}
|
||||
|
||||
let data_key: [u8; 32] = if let Some(ck) = customer_key {
|
||||
if ck.len() != 32 {
|
||||
return Err(CryptoError::InvalidKeySize(ck.len()));
|
||||
}
|
||||
let mut k = [0u8; 32];
|
||||
k.copy_from_slice(ck);
|
||||
k
|
||||
} else if enc_meta.algorithm == "aws:kms" {
|
||||
let kms = self
|
||||
.kms
|
||||
.as_ref()
|
||||
.ok_or_else(|| CryptoError::EncryptionFailed("KMS not available".into()))?;
|
||||
let kid = enc_meta
|
||||
.kms_key_id
|
||||
.as_ref()
|
||||
.ok_or_else(|| CryptoError::EncryptionFailed("No KMS key ID in metadata".into()))?;
|
||||
let encrypted_dk = enc_meta.encrypted_data_key.as_ref().ok_or_else(|| {
|
||||
CryptoError::EncryptionFailed("No encrypted data key in metadata".into())
|
||||
})?;
|
||||
let ct = B64.decode(encrypted_dk).map_err(|e| {
|
||||
CryptoError::EncryptionFailed(format!("Bad data key encoding: {}", e))
|
||||
})?;
|
||||
let dk = kms.decrypt_data(kid, &ct).await?;
|
||||
if dk.len() != 32 {
|
||||
return Err(CryptoError::InvalidKeySize(dk.len()));
|
||||
}
|
||||
let mut k = [0u8; 32];
|
||||
k.copy_from_slice(&dk);
|
||||
k
|
||||
} else {
|
||||
let wrapped = enc_meta.encrypted_data_key.as_ref().ok_or_else(|| {
|
||||
CryptoError::EncryptionFailed("No encrypted data key in metadata".into())
|
||||
})?;
|
||||
self.unwrap_data_key(wrapped)?
|
||||
};
|
||||
|
||||
let ip = input_path.to_owned();
|
||||
let op = output_path.to_owned();
|
||||
let nb: [u8; 12] = nonce_bytes.try_into().unwrap();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
decrypt_stream_chunked(&ip, &op, &data_key, &nb)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| CryptoError::Io(std::io::Error::new(std::io::ErrorKind::Other, e)))??;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Write;
|
||||
|
||||
fn test_master_key() -> [u8; 32] {
|
||||
[0x42u8; 32]
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_wrap_unwrap_data_key() {
|
||||
let svc = EncryptionService::new(test_master_key(), None);
|
||||
let dk = [0xAAu8; 32];
|
||||
let wrapped = svc.wrap_data_key(&dk).unwrap();
|
||||
let unwrapped = svc.unwrap_data_key(&wrapped).unwrap();
|
||||
assert_eq!(dk, unwrapped);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_encrypt_decrypt_object_sse_s3() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let input = dir.path().join("plain.bin");
|
||||
let encrypted = dir.path().join("enc.bin");
|
||||
let decrypted = dir.path().join("dec.bin");
|
||||
|
||||
let data = b"SSE-S3 encrypted content for testing!";
|
||||
std::fs::File::create(&input).unwrap().write_all(data).unwrap();
|
||||
|
||||
let svc = EncryptionService::new(test_master_key(), None);
|
||||
|
||||
let ctx = EncryptionContext {
|
||||
algorithm: SseAlgorithm::Aes256,
|
||||
kms_key_id: None,
|
||||
customer_key: None,
|
||||
};
|
||||
|
||||
let meta = svc.encrypt_object(&input, &encrypted, &ctx).await.unwrap();
|
||||
assert_eq!(meta.algorithm, "AES256");
|
||||
assert!(meta.encrypted_data_key.is_some());
|
||||
|
||||
svc.decrypt_object(&encrypted, &decrypted, &meta, None)
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let result = std::fs::read(&decrypted).unwrap();
|
||||
assert_eq!(result, data);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_encrypt_decrypt_object_sse_c() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let input = dir.path().join("plain.bin");
|
||||
let encrypted = dir.path().join("enc.bin");
|
||||
let decrypted = dir.path().join("dec.bin");
|
||||
|
||||
let data = b"SSE-C encrypted content!";
|
||||
std::fs::File::create(&input).unwrap().write_all(data).unwrap();
|
||||
|
||||
let customer_key = [0xBBu8; 32];
|
||||
let svc = EncryptionService::new(test_master_key(), None);
|
||||
|
||||
let ctx = EncryptionContext {
|
||||
algorithm: SseAlgorithm::CustomerProvided,
|
||||
kms_key_id: None,
|
||||
customer_key: Some(customer_key.to_vec()),
|
||||
};
|
||||
|
||||
let meta = svc.encrypt_object(&input, &encrypted, &ctx).await.unwrap();
|
||||
assert!(meta.encrypted_data_key.is_none());
|
||||
|
||||
svc.decrypt_object(&encrypted, &decrypted, &meta, Some(&customer_key))
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let result = std::fs::read(&decrypted).unwrap();
|
||||
assert_eq!(result, data);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_encryption_metadata_roundtrip() {
|
||||
let meta = EncryptionMetadata {
|
||||
algorithm: "AES256".to_string(),
|
||||
nonce: "dGVzdG5vbmNlMTI=".to_string(),
|
||||
encrypted_data_key: Some("c29tZWtleQ==".to_string()),
|
||||
kms_key_id: None,
|
||||
};
|
||||
let map = meta.to_metadata_map();
|
||||
let restored = EncryptionMetadata::from_metadata(&map).unwrap();
|
||||
assert_eq!(restored.algorithm, "AES256");
|
||||
assert_eq!(restored.nonce, meta.nonce);
|
||||
assert_eq!(restored.encrypted_data_key, meta.encrypted_data_key);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_encrypted() {
|
||||
let mut meta = HashMap::new();
|
||||
assert!(!EncryptionMetadata::is_encrypted(&meta));
|
||||
meta.insert("x-amz-server-side-encryption".to_string(), "AES256".to_string());
|
||||
assert!(EncryptionMetadata::is_encrypted(&meta));
|
||||
}
|
||||
}
|
||||
132
myfsio-engine/crates/myfsio-crypto/src/hashing.rs
Normal file
132
myfsio-engine/crates/myfsio-crypto/src/hashing.rs
Normal file
@@ -0,0 +1,132 @@
|
||||
use md5::{Digest, Md5};
|
||||
use sha2::Sha256;
|
||||
use std::io::Read;
|
||||
use std::path::Path;
|
||||
|
||||
const CHUNK_SIZE: usize = 65536;
|
||||
|
||||
pub fn md5_file(path: &Path) -> std::io::Result<String> {
|
||||
let mut file = std::fs::File::open(path)?;
|
||||
let mut hasher = Md5::new();
|
||||
let mut buf = vec![0u8; CHUNK_SIZE];
|
||||
loop {
|
||||
let n = file.read(&mut buf)?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buf[..n]);
|
||||
}
|
||||
Ok(format!("{:x}", hasher.finalize()))
|
||||
}
|
||||
|
||||
pub fn md5_bytes(data: &[u8]) -> String {
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(data);
|
||||
format!("{:x}", hasher.finalize())
|
||||
}
|
||||
|
||||
pub fn sha256_file(path: &Path) -> std::io::Result<String> {
|
||||
let mut file = std::fs::File::open(path)?;
|
||||
let mut hasher = Sha256::new();
|
||||
let mut buf = vec![0u8; CHUNK_SIZE];
|
||||
loop {
|
||||
let n = file.read(&mut buf)?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buf[..n]);
|
||||
}
|
||||
Ok(format!("{:x}", hasher.finalize()))
|
||||
}
|
||||
|
||||
pub fn sha256_bytes(data: &[u8]) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data);
|
||||
format!("{:x}", hasher.finalize())
|
||||
}
|
||||
|
||||
pub fn md5_sha256_file(path: &Path) -> std::io::Result<(String, String)> {
|
||||
let mut file = std::fs::File::open(path)?;
|
||||
let mut md5_hasher = Md5::new();
|
||||
let mut sha_hasher = Sha256::new();
|
||||
let mut buf = vec![0u8; CHUNK_SIZE];
|
||||
loop {
|
||||
let n = file.read(&mut buf)?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
md5_hasher.update(&buf[..n]);
|
||||
sha_hasher.update(&buf[..n]);
|
||||
}
|
||||
Ok((
|
||||
format!("{:x}", md5_hasher.finalize()),
|
||||
format!("{:x}", sha_hasher.finalize()),
|
||||
))
|
||||
}
|
||||
|
||||
pub async fn md5_file_async(path: &Path) -> std::io::Result<String> {
|
||||
let path = path.to_owned();
|
||||
tokio::task::spawn_blocking(move || md5_file(&path))
|
||||
.await
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
|
||||
}
|
||||
|
||||
pub async fn sha256_file_async(path: &Path) -> std::io::Result<String> {
|
||||
let path = path.to_owned();
|
||||
tokio::task::spawn_blocking(move || sha256_file(&path))
|
||||
.await
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
|
||||
}
|
||||
|
||||
pub async fn md5_sha256_file_async(path: &Path) -> std::io::Result<(String, String)> {
|
||||
let path = path.to_owned();
|
||||
tokio::task::spawn_blocking(move || md5_sha256_file(&path))
|
||||
.await
|
||||
.map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::io::Write;
|
||||
|
||||
#[test]
|
||||
fn test_md5_bytes() {
|
||||
assert_eq!(md5_bytes(b""), "d41d8cd98f00b204e9800998ecf8427e");
|
||||
assert_eq!(md5_bytes(b"hello"), "5d41402abc4b2a76b9719d911017c592");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sha256_bytes() {
|
||||
let hash = sha256_bytes(b"hello");
|
||||
assert_eq!(hash, "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_md5_file() {
|
||||
let mut tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
tmp.write_all(b"hello").unwrap();
|
||||
tmp.flush().unwrap();
|
||||
let hash = md5_file(tmp.path()).unwrap();
|
||||
assert_eq!(hash, "5d41402abc4b2a76b9719d911017c592");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_md5_sha256_file() {
|
||||
let mut tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
tmp.write_all(b"hello").unwrap();
|
||||
tmp.flush().unwrap();
|
||||
let (md5, sha) = md5_sha256_file(tmp.path()).unwrap();
|
||||
assert_eq!(md5, "5d41402abc4b2a76b9719d911017c592");
|
||||
assert_eq!(sha, "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_md5_file_async() {
|
||||
let mut tmp = tempfile::NamedTempFile::new().unwrap();
|
||||
tmp.write_all(b"hello").unwrap();
|
||||
tmp.flush().unwrap();
|
||||
let hash = md5_file_async(tmp.path()).await.unwrap();
|
||||
assert_eq!(hash, "5d41402abc4b2a76b9719d911017c592");
|
||||
}
|
||||
}
|
||||
453
myfsio-engine/crates/myfsio-crypto/src/kms.rs
Normal file
453
myfsio-engine/crates/myfsio-crypto/src/kms.rs
Normal file
@@ -0,0 +1,453 @@
|
||||
use aes_gcm::aead::Aead;
|
||||
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
|
||||
use base64::engine::general_purpose::STANDARD as B64;
|
||||
use base64::Engine;
|
||||
use chrono::{DateTime, Utc};
|
||||
use rand::RngCore;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::aes_gcm::CryptoError;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct KmsKey {
|
||||
#[serde(rename = "KeyId")]
|
||||
pub key_id: String,
|
||||
#[serde(rename = "Arn")]
|
||||
pub arn: String,
|
||||
#[serde(rename = "Description")]
|
||||
pub description: String,
|
||||
#[serde(rename = "CreationDate")]
|
||||
pub creation_date: DateTime<Utc>,
|
||||
#[serde(rename = "Enabled")]
|
||||
pub enabled: bool,
|
||||
#[serde(rename = "KeyState")]
|
||||
pub key_state: String,
|
||||
#[serde(rename = "KeyUsage")]
|
||||
pub key_usage: String,
|
||||
#[serde(rename = "KeySpec")]
|
||||
pub key_spec: String,
|
||||
#[serde(rename = "EncryptedKeyMaterial")]
|
||||
pub encrypted_key_material: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct KmsStore {
|
||||
keys: Vec<KmsKey>,
|
||||
}
|
||||
|
||||
pub struct KmsService {
|
||||
keys_path: PathBuf,
|
||||
master_key: Arc<RwLock<[u8; 32]>>,
|
||||
keys: Arc<RwLock<Vec<KmsKey>>>,
|
||||
}
|
||||
|
||||
impl KmsService {
|
||||
pub async fn new(keys_dir: &Path) -> Result<Self, CryptoError> {
|
||||
std::fs::create_dir_all(keys_dir).map_err(CryptoError::Io)?;
|
||||
|
||||
let keys_path = keys_dir.join("kms_keys.json");
|
||||
|
||||
let master_key = Self::load_or_create_master_key(&keys_dir.join("kms_master.key"))?;
|
||||
|
||||
let keys = if keys_path.exists() {
|
||||
let data = std::fs::read_to_string(&keys_path).map_err(CryptoError::Io)?;
|
||||
let store: KmsStore = serde_json::from_str(&data)
|
||||
.map_err(|e| CryptoError::EncryptionFailed(format!("Bad KMS store: {}", e)))?;
|
||||
store.keys
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
keys_path,
|
||||
master_key: Arc::new(RwLock::new(master_key)),
|
||||
keys: Arc::new(RwLock::new(keys)),
|
||||
})
|
||||
}
|
||||
|
||||
fn load_or_create_master_key(path: &Path) -> Result<[u8; 32], CryptoError> {
|
||||
if path.exists() {
|
||||
let encoded = std::fs::read_to_string(path).map_err(CryptoError::Io)?;
|
||||
let decoded = B64.decode(encoded.trim()).map_err(|e| {
|
||||
CryptoError::EncryptionFailed(format!("Bad master key encoding: {}", e))
|
||||
})?;
|
||||
if decoded.len() != 32 {
|
||||
return Err(CryptoError::InvalidKeySize(decoded.len()));
|
||||
}
|
||||
let mut key = [0u8; 32];
|
||||
key.copy_from_slice(&decoded);
|
||||
Ok(key)
|
||||
} else {
|
||||
let mut key = [0u8; 32];
|
||||
rand::thread_rng().fill_bytes(&mut key);
|
||||
let encoded = B64.encode(key);
|
||||
std::fs::write(path, &encoded).map_err(CryptoError::Io)?;
|
||||
Ok(key)
|
||||
}
|
||||
}
|
||||
|
||||
fn encrypt_key_material(
|
||||
master_key: &[u8; 32],
|
||||
plaintext_key: &[u8],
|
||||
) -> Result<String, CryptoError> {
|
||||
let cipher = Aes256Gcm::new(master_key.into());
|
||||
let mut nonce_bytes = [0u8; 12];
|
||||
rand::thread_rng().fill_bytes(&mut nonce_bytes);
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
|
||||
let ciphertext = cipher
|
||||
.encrypt(nonce, plaintext_key)
|
||||
.map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?;
|
||||
|
||||
let mut combined = Vec::with_capacity(12 + ciphertext.len());
|
||||
combined.extend_from_slice(&nonce_bytes);
|
||||
combined.extend_from_slice(&ciphertext);
|
||||
Ok(B64.encode(&combined))
|
||||
}
|
||||
|
||||
fn decrypt_key_material(
|
||||
master_key: &[u8; 32],
|
||||
encrypted_b64: &str,
|
||||
) -> Result<Vec<u8>, CryptoError> {
|
||||
let combined = B64.decode(encrypted_b64).map_err(|e| {
|
||||
CryptoError::EncryptionFailed(format!("Bad key material encoding: {}", e))
|
||||
})?;
|
||||
if combined.len() < 12 {
|
||||
return Err(CryptoError::EncryptionFailed(
|
||||
"Encrypted key material too short".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let (nonce_bytes, ciphertext) = combined.split_at(12);
|
||||
let cipher = Aes256Gcm::new(master_key.into());
|
||||
let nonce = Nonce::from_slice(nonce_bytes);
|
||||
|
||||
cipher
|
||||
.decrypt(nonce, ciphertext)
|
||||
.map_err(|_| CryptoError::DecryptionFailed(0))
|
||||
}
|
||||
|
||||
async fn save(&self) -> Result<(), CryptoError> {
|
||||
let keys = self.keys.read().await;
|
||||
let store = KmsStore {
|
||||
keys: keys.clone(),
|
||||
};
|
||||
let json = serde_json::to_string_pretty(&store)
|
||||
.map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?;
|
||||
std::fs::write(&self.keys_path, json).map_err(CryptoError::Io)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn create_key(&self, description: &str) -> Result<KmsKey, CryptoError> {
|
||||
let key_id = uuid::Uuid::new_v4().to_string();
|
||||
let arn = format!("arn:aws:kms:local:000000000000:key/{}", key_id);
|
||||
|
||||
let mut plaintext_key = [0u8; 32];
|
||||
rand::thread_rng().fill_bytes(&mut plaintext_key);
|
||||
|
||||
let master = self.master_key.read().await;
|
||||
let encrypted = Self::encrypt_key_material(&master, &plaintext_key)?;
|
||||
|
||||
let kms_key = KmsKey {
|
||||
key_id: key_id.clone(),
|
||||
arn,
|
||||
description: description.to_string(),
|
||||
creation_date: Utc::now(),
|
||||
enabled: true,
|
||||
key_state: "Enabled".to_string(),
|
||||
key_usage: "ENCRYPT_DECRYPT".to_string(),
|
||||
key_spec: "SYMMETRIC_DEFAULT".to_string(),
|
||||
encrypted_key_material: encrypted,
|
||||
};
|
||||
|
||||
self.keys.write().await.push(kms_key.clone());
|
||||
self.save().await?;
|
||||
Ok(kms_key)
|
||||
}
|
||||
|
||||
pub async fn list_keys(&self) -> Vec<KmsKey> {
|
||||
self.keys.read().await.clone()
|
||||
}
|
||||
|
||||
pub async fn get_key(&self, key_id: &str) -> Option<KmsKey> {
|
||||
let keys = self.keys.read().await;
|
||||
keys.iter()
|
||||
.find(|k| k.key_id == key_id || k.arn == key_id)
|
||||
.cloned()
|
||||
}
|
||||
|
||||
pub async fn delete_key(&self, key_id: &str) -> Result<bool, CryptoError> {
|
||||
let mut keys = self.keys.write().await;
|
||||
let len_before = keys.len();
|
||||
keys.retain(|k| k.key_id != key_id && k.arn != key_id);
|
||||
let removed = keys.len() < len_before;
|
||||
drop(keys);
|
||||
if removed {
|
||||
self.save().await?;
|
||||
}
|
||||
Ok(removed)
|
||||
}
|
||||
|
||||
pub async fn enable_key(&self, key_id: &str) -> Result<bool, CryptoError> {
|
||||
let mut keys = self.keys.write().await;
|
||||
if let Some(key) = keys.iter_mut().find(|k| k.key_id == key_id) {
|
||||
key.enabled = true;
|
||||
key.key_state = "Enabled".to_string();
|
||||
drop(keys);
|
||||
self.save().await?;
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn disable_key(&self, key_id: &str) -> Result<bool, CryptoError> {
|
||||
let mut keys = self.keys.write().await;
|
||||
if let Some(key) = keys.iter_mut().find(|k| k.key_id == key_id) {
|
||||
key.enabled = false;
|
||||
key.key_state = "Disabled".to_string();
|
||||
drop(keys);
|
||||
self.save().await?;
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn decrypt_data_key(&self, key_id: &str) -> Result<Vec<u8>, CryptoError> {
|
||||
let keys = self.keys.read().await;
|
||||
let key = keys
|
||||
.iter()
|
||||
.find(|k| k.key_id == key_id || k.arn == key_id)
|
||||
.ok_or_else(|| CryptoError::EncryptionFailed("KMS key not found".to_string()))?;
|
||||
|
||||
if !key.enabled {
|
||||
return Err(CryptoError::EncryptionFailed(
|
||||
"KMS key is disabled".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let master = self.master_key.read().await;
|
||||
Self::decrypt_key_material(&master, &key.encrypted_key_material)
|
||||
}
|
||||
|
||||
pub async fn encrypt_data(
|
||||
&self,
|
||||
key_id: &str,
|
||||
plaintext: &[u8],
|
||||
) -> Result<Vec<u8>, CryptoError> {
|
||||
let data_key = self.decrypt_data_key(key_id).await?;
|
||||
if data_key.len() != 32 {
|
||||
return Err(CryptoError::InvalidKeySize(data_key.len()));
|
||||
}
|
||||
|
||||
let key_arr: [u8; 32] = data_key.try_into().unwrap();
|
||||
let cipher = Aes256Gcm::new(&key_arr.into());
|
||||
let mut nonce_bytes = [0u8; 12];
|
||||
rand::thread_rng().fill_bytes(&mut nonce_bytes);
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
|
||||
let ciphertext = cipher
|
||||
.encrypt(nonce, plaintext)
|
||||
.map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?;
|
||||
|
||||
let mut result = Vec::with_capacity(12 + ciphertext.len());
|
||||
result.extend_from_slice(&nonce_bytes);
|
||||
result.extend_from_slice(&ciphertext);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub async fn decrypt_data(
|
||||
&self,
|
||||
key_id: &str,
|
||||
ciphertext: &[u8],
|
||||
) -> Result<Vec<u8>, CryptoError> {
|
||||
if ciphertext.len() < 12 {
|
||||
return Err(CryptoError::EncryptionFailed(
|
||||
"Ciphertext too short".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let data_key = self.decrypt_data_key(key_id).await?;
|
||||
if data_key.len() != 32 {
|
||||
return Err(CryptoError::InvalidKeySize(data_key.len()));
|
||||
}
|
||||
|
||||
let key_arr: [u8; 32] = data_key.try_into().unwrap();
|
||||
let (nonce_bytes, ct) = ciphertext.split_at(12);
|
||||
let cipher = Aes256Gcm::new(&key_arr.into());
|
||||
let nonce = Nonce::from_slice(nonce_bytes);
|
||||
|
||||
cipher
|
||||
.decrypt(nonce, ct)
|
||||
.map_err(|_| CryptoError::DecryptionFailed(0))
|
||||
}
|
||||
|
||||
pub async fn generate_data_key(
|
||||
&self,
|
||||
key_id: &str,
|
||||
num_bytes: usize,
|
||||
) -> Result<(Vec<u8>, Vec<u8>), CryptoError> {
|
||||
let kms_key = self.decrypt_data_key(key_id).await?;
|
||||
if kms_key.len() != 32 {
|
||||
return Err(CryptoError::InvalidKeySize(kms_key.len()));
|
||||
}
|
||||
|
||||
let mut plaintext_key = vec![0u8; num_bytes];
|
||||
rand::thread_rng().fill_bytes(&mut plaintext_key);
|
||||
|
||||
let key_arr: [u8; 32] = kms_key.try_into().unwrap();
|
||||
let cipher = Aes256Gcm::new(&key_arr.into());
|
||||
let mut nonce_bytes = [0u8; 12];
|
||||
rand::thread_rng().fill_bytes(&mut nonce_bytes);
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
|
||||
let encrypted = cipher
|
||||
.encrypt(nonce, plaintext_key.as_slice())
|
||||
.map_err(|e| CryptoError::EncryptionFailed(e.to_string()))?;
|
||||
|
||||
let mut wrapped = Vec::with_capacity(12 + encrypted.len());
|
||||
wrapped.extend_from_slice(&nonce_bytes);
|
||||
wrapped.extend_from_slice(&encrypted);
|
||||
|
||||
Ok((plaintext_key, wrapped))
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn load_or_create_master_key(keys_dir: &Path) -> Result<[u8; 32], CryptoError> {
|
||||
std::fs::create_dir_all(keys_dir).map_err(CryptoError::Io)?;
|
||||
let path = keys_dir.join("master.key");
|
||||
|
||||
if path.exists() {
|
||||
let encoded = std::fs::read_to_string(&path).map_err(CryptoError::Io)?;
|
||||
let decoded = B64.decode(encoded.trim()).map_err(|e| {
|
||||
CryptoError::EncryptionFailed(format!("Bad master key encoding: {}", e))
|
||||
})?;
|
||||
if decoded.len() != 32 {
|
||||
return Err(CryptoError::InvalidKeySize(decoded.len()));
|
||||
}
|
||||
let mut key = [0u8; 32];
|
||||
key.copy_from_slice(&decoded);
|
||||
Ok(key)
|
||||
} else {
|
||||
let mut key = [0u8; 32];
|
||||
rand::thread_rng().fill_bytes(&mut key);
|
||||
let encoded = B64.encode(key);
|
||||
std::fs::write(&path, &encoded).map_err(CryptoError::Io)?;
|
||||
Ok(key)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_create_and_list_keys() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let kms = KmsService::new(dir.path()).await.unwrap();
|
||||
|
||||
let key = kms.create_key("test key").await.unwrap();
|
||||
assert!(key.enabled);
|
||||
assert_eq!(key.description, "test key");
|
||||
assert!(key.key_id.len() > 0);
|
||||
|
||||
let keys = kms.list_keys().await;
|
||||
assert_eq!(keys.len(), 1);
|
||||
assert_eq!(keys[0].key_id, key.key_id);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_enable_disable_key() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let kms = KmsService::new(dir.path()).await.unwrap();
|
||||
|
||||
let key = kms.create_key("toggle").await.unwrap();
|
||||
assert!(key.enabled);
|
||||
|
||||
kms.disable_key(&key.key_id).await.unwrap();
|
||||
let k = kms.get_key(&key.key_id).await.unwrap();
|
||||
assert!(!k.enabled);
|
||||
|
||||
kms.enable_key(&key.key_id).await.unwrap();
|
||||
let k = kms.get_key(&key.key_id).await.unwrap();
|
||||
assert!(k.enabled);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_delete_key() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let kms = KmsService::new(dir.path()).await.unwrap();
|
||||
|
||||
let key = kms.create_key("doomed").await.unwrap();
|
||||
assert!(kms.delete_key(&key.key_id).await.unwrap());
|
||||
assert!(kms.get_key(&key.key_id).await.is_none());
|
||||
assert_eq!(kms.list_keys().await.len(), 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_encrypt_decrypt_data() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let kms = KmsService::new(dir.path()).await.unwrap();
|
||||
|
||||
let key = kms.create_key("enc-key").await.unwrap();
|
||||
let plaintext = b"Hello, KMS!";
|
||||
|
||||
let ciphertext = kms.encrypt_data(&key.key_id, plaintext).await.unwrap();
|
||||
assert_ne!(&ciphertext, plaintext);
|
||||
|
||||
let decrypted = kms.decrypt_data(&key.key_id, &ciphertext).await.unwrap();
|
||||
assert_eq!(decrypted, plaintext);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_generate_data_key() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let kms = KmsService::new(dir.path()).await.unwrap();
|
||||
|
||||
let key = kms.create_key("data-key-gen").await.unwrap();
|
||||
let (plaintext, wrapped) = kms.generate_data_key(&key.key_id, 32).await.unwrap();
|
||||
|
||||
assert_eq!(plaintext.len(), 32);
|
||||
assert!(wrapped.len() > 32);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_disabled_key_cannot_encrypt() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let kms = KmsService::new(dir.path()).await.unwrap();
|
||||
|
||||
let key = kms.create_key("disabled").await.unwrap();
|
||||
kms.disable_key(&key.key_id).await.unwrap();
|
||||
|
||||
let result = kms.encrypt_data(&key.key_id, b"test").await;
|
||||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_persistence_across_reload() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
|
||||
let key_id = {
|
||||
let kms = KmsService::new(dir.path()).await.unwrap();
|
||||
let key = kms.create_key("persistent").await.unwrap();
|
||||
key.key_id
|
||||
};
|
||||
|
||||
let kms2 = KmsService::new(dir.path()).await.unwrap();
|
||||
let key = kms2.get_key(&key_id).await;
|
||||
assert!(key.is_some());
|
||||
assert_eq!(key.unwrap().description, "persistent");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_master_key_roundtrip() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let key1 = load_or_create_master_key(dir.path()).await.unwrap();
|
||||
let key2 = load_or_create_master_key(dir.path()).await.unwrap();
|
||||
assert_eq!(key1, key2);
|
||||
}
|
||||
}
|
||||
4
myfsio-engine/crates/myfsio-crypto/src/lib.rs
Normal file
4
myfsio-engine/crates/myfsio-crypto/src/lib.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod hashing;
|
||||
pub mod aes_gcm;
|
||||
pub mod kms;
|
||||
pub mod encryption;
|
||||
39
myfsio-engine/crates/myfsio-server/Cargo.toml
Normal file
39
myfsio-engine/crates/myfsio-server/Cargo.toml
Normal file
@@ -0,0 +1,39 @@
|
||||
[package]
|
||||
name = "myfsio-server"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
myfsio-common = { path = "../myfsio-common" }
|
||||
myfsio-auth = { path = "../myfsio-auth" }
|
||||
myfsio-crypto = { path = "../myfsio-crypto" }
|
||||
myfsio-storage = { path = "../myfsio-storage" }
|
||||
myfsio-xml = { path = "../myfsio-xml" }
|
||||
base64 = { workspace = true }
|
||||
axum = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
tower = { workspace = true }
|
||||
tower-http = { workspace = true }
|
||||
hyper = { workspace = true }
|
||||
bytes = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true }
|
||||
tokio-util = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
futures = { workspace = true }
|
||||
http-body-util = "0.1"
|
||||
percent-encoding = { workspace = true }
|
||||
quick-xml = { workspace = true }
|
||||
mime_guess = "2"
|
||||
crc32fast = { workspace = true }
|
||||
duckdb = { workspace = true }
|
||||
roxmltree = "0.20"
|
||||
parking_lot = { workspace = true }
|
||||
regex = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3"
|
||||
tower = { workspace = true, features = ["util"] }
|
||||
117
myfsio-engine/crates/myfsio-server/src/config.rs
Normal file
117
myfsio-engine/crates/myfsio-server/src/config.rs
Normal file
@@ -0,0 +1,117 @@
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ServerConfig {
|
||||
pub bind_addr: SocketAddr,
|
||||
pub storage_root: PathBuf,
|
||||
pub region: String,
|
||||
pub iam_config_path: PathBuf,
|
||||
pub sigv4_timestamp_tolerance_secs: u64,
|
||||
pub presigned_url_min_expiry: u64,
|
||||
pub presigned_url_max_expiry: u64,
|
||||
pub secret_key: Option<String>,
|
||||
pub encryption_enabled: bool,
|
||||
pub kms_enabled: bool,
|
||||
pub gc_enabled: bool,
|
||||
pub integrity_enabled: bool,
|
||||
pub metrics_enabled: bool,
|
||||
pub lifecycle_enabled: bool,
|
||||
pub website_hosting_enabled: bool,
|
||||
}
|
||||
|
||||
impl ServerConfig {
|
||||
pub fn from_env() -> Self {
|
||||
let host = std::env::var("HOST").unwrap_or_else(|_| "127.0.0.1".to_string());
|
||||
let port: u16 = std::env::var("PORT")
|
||||
.unwrap_or_else(|_| "5000".to_string())
|
||||
.parse()
|
||||
.unwrap_or(5000);
|
||||
let storage_root = std::env::var("STORAGE_ROOT")
|
||||
.unwrap_or_else(|_| "./data".to_string());
|
||||
let region = std::env::var("AWS_REGION")
|
||||
.unwrap_or_else(|_| "us-east-1".to_string());
|
||||
|
||||
let storage_path = PathBuf::from(&storage_root);
|
||||
let iam_config_path = std::env::var("IAM_CONFIG")
|
||||
.map(PathBuf::from)
|
||||
.unwrap_or_else(|_| {
|
||||
storage_path.join(".myfsio.sys").join("config").join("iam.json")
|
||||
});
|
||||
|
||||
let sigv4_timestamp_tolerance_secs: u64 = std::env::var("SIGV4_TIMESTAMP_TOLERANCE_SECONDS")
|
||||
.unwrap_or_else(|_| "900".to_string())
|
||||
.parse()
|
||||
.unwrap_or(900);
|
||||
|
||||
let presigned_url_min_expiry: u64 = std::env::var("PRESIGNED_URL_MIN_EXPIRY_SECONDS")
|
||||
.unwrap_or_else(|_| "1".to_string())
|
||||
.parse()
|
||||
.unwrap_or(1);
|
||||
|
||||
let presigned_url_max_expiry: u64 = std::env::var("PRESIGNED_URL_MAX_EXPIRY_SECONDS")
|
||||
.unwrap_or_else(|_| "604800".to_string())
|
||||
.parse()
|
||||
.unwrap_or(604800);
|
||||
|
||||
let secret_key = {
|
||||
let env_key = std::env::var("SECRET_KEY").ok();
|
||||
match env_key {
|
||||
Some(k) if !k.is_empty() && k != "dev-secret-key" => Some(k),
|
||||
_ => {
|
||||
let secret_file = storage_path
|
||||
.join(".myfsio.sys")
|
||||
.join("config")
|
||||
.join(".secret");
|
||||
std::fs::read_to_string(&secret_file).ok().map(|s| s.trim().to_string())
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let encryption_enabled = std::env::var("ENCRYPTION_ENABLED")
|
||||
.unwrap_or_else(|_| "false".to_string())
|
||||
.to_lowercase() == "true";
|
||||
|
||||
let kms_enabled = std::env::var("KMS_ENABLED")
|
||||
.unwrap_or_else(|_| "false".to_string())
|
||||
.to_lowercase() == "true";
|
||||
|
||||
let gc_enabled = std::env::var("GC_ENABLED")
|
||||
.unwrap_or_else(|_| "false".to_string())
|
||||
.to_lowercase() == "true";
|
||||
|
||||
let integrity_enabled = std::env::var("INTEGRITY_ENABLED")
|
||||
.unwrap_or_else(|_| "false".to_string())
|
||||
.to_lowercase() == "true";
|
||||
|
||||
let metrics_enabled = std::env::var("OPERATION_METRICS_ENABLED")
|
||||
.unwrap_or_else(|_| "false".to_string())
|
||||
.to_lowercase() == "true";
|
||||
|
||||
let lifecycle_enabled = std::env::var("LIFECYCLE_ENABLED")
|
||||
.unwrap_or_else(|_| "false".to_string())
|
||||
.to_lowercase() == "true";
|
||||
|
||||
let website_hosting_enabled = std::env::var("WEBSITE_HOSTING_ENABLED")
|
||||
.unwrap_or_else(|_| "false".to_string())
|
||||
.to_lowercase() == "true";
|
||||
|
||||
Self {
|
||||
bind_addr: SocketAddr::new(host.parse().unwrap(), port),
|
||||
storage_root: storage_path,
|
||||
region,
|
||||
iam_config_path,
|
||||
sigv4_timestamp_tolerance_secs,
|
||||
presigned_url_min_expiry,
|
||||
presigned_url_max_expiry,
|
||||
secret_key,
|
||||
encryption_enabled,
|
||||
kms_enabled,
|
||||
gc_enabled,
|
||||
integrity_enabled,
|
||||
metrics_enabled,
|
||||
lifecycle_enabled,
|
||||
website_hosting_enabled,
|
||||
}
|
||||
}
|
||||
}
|
||||
704
myfsio-engine/crates/myfsio-server/src/handlers/admin.rs
Normal file
704
myfsio-engine/crates/myfsio-server/src/handlers/admin.rs
Normal file
@@ -0,0 +1,704 @@
|
||||
use axum::body::Body;
|
||||
use axum::extract::{Path, State};
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use axum::Extension;
|
||||
use myfsio_common::types::Principal;
|
||||
use myfsio_storage::traits::StorageEngine;
|
||||
|
||||
use crate::services::site_registry::{PeerSite, SiteInfo};
|
||||
use crate::services::website_domains::{is_valid_domain, normalize_domain};
|
||||
use crate::state::AppState;
|
||||
|
||||
fn json_response(status: StatusCode, value: serde_json::Value) -> Response {
|
||||
(
|
||||
status,
|
||||
[("content-type", "application/json")],
|
||||
value.to_string(),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
fn json_error(code: &str, message: &str, status: StatusCode) -> Response {
|
||||
json_response(
|
||||
status,
|
||||
serde_json::json!({"error": {"code": code, "message": message}}),
|
||||
)
|
||||
}
|
||||
|
||||
fn require_admin(principal: &Principal) -> Option<Response> {
|
||||
if !principal.is_admin {
|
||||
return Some(json_error("AccessDenied", "Admin access required", StatusCode::FORBIDDEN));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
async fn read_json_body(body: Body) -> Option<serde_json::Value> {
|
||||
let bytes = http_body_util::BodyExt::collect(body).await.ok()?.to_bytes();
|
||||
serde_json::from_slice(&bytes).ok()
|
||||
}
|
||||
|
||||
fn validate_site_id(site_id: &str) -> Option<String> {
|
||||
if site_id.is_empty() || site_id.len() > 63 {
|
||||
return Some("site_id must be 1-63 characters".to_string());
|
||||
}
|
||||
let first = site_id.chars().next().unwrap();
|
||||
if !first.is_ascii_alphanumeric() {
|
||||
return Some("site_id must start with alphanumeric".to_string());
|
||||
}
|
||||
if !site_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_') {
|
||||
return Some("site_id must contain only alphanumeric, hyphens, underscores".to_string());
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn validate_endpoint(endpoint: &str) -> Option<String> {
|
||||
if !endpoint.starts_with("http://") && !endpoint.starts_with("https://") {
|
||||
return Some("Endpoint must be http or https URL".to_string());
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn validate_region(region: &str) -> Option<String> {
|
||||
let re = regex::Regex::new(r"^[a-z]{2,}-[a-z]+-\d+$").unwrap();
|
||||
if !re.is_match(region) {
|
||||
return Some("Region must match format like us-east-1".to_string());
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn validate_priority(priority: i64) -> Option<String> {
|
||||
if priority < 0 || priority > 1000 {
|
||||
return Some("Priority must be between 0 and 1000".to_string());
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub async fn get_local_site(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
|
||||
if let Some(ref registry) = state.site_registry {
|
||||
if let Some(local) = registry.get_local_site() {
|
||||
return json_response(StatusCode::OK, serde_json::to_value(&local).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
json_error("NotFound", "Local site not configured", StatusCode::NOT_FOUND)
|
||||
}
|
||||
|
||||
pub async fn update_local_site(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
body: Body,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let registry = match &state.site_registry {
|
||||
Some(r) => r,
|
||||
None => return json_error("InvalidRequest", "Site registry not available", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
let payload = match read_json_body(body).await {
|
||||
Some(v) => v,
|
||||
None => return json_error("MalformedJSON", "Invalid JSON body", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
let site_id = match payload.get("site_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => return json_error("ValidationError", "site_id is required", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
if let Some(err) = validate_site_id(&site_id) {
|
||||
return json_error("ValidationError", &err, StatusCode::BAD_REQUEST);
|
||||
}
|
||||
|
||||
let endpoint = payload.get("endpoint").and_then(|v| v.as_str()).unwrap_or("").to_string();
|
||||
if !endpoint.is_empty() {
|
||||
if let Some(err) = validate_endpoint(&endpoint) {
|
||||
return json_error("ValidationError", &err, StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(p) = payload.get("priority").and_then(|v| v.as_i64()) {
|
||||
if let Some(err) = validate_priority(p) {
|
||||
return json_error("ValidationError", &err, StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(r) = payload.get("region").and_then(|v| v.as_str()) {
|
||||
if let Some(err) = validate_region(r) {
|
||||
return json_error("ValidationError", &err, StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
|
||||
let existing = registry.get_local_site();
|
||||
let site = SiteInfo {
|
||||
site_id: site_id.clone(),
|
||||
endpoint,
|
||||
region: payload.get("region").and_then(|v| v.as_str()).unwrap_or("us-east-1").to_string(),
|
||||
priority: payload.get("priority").and_then(|v| v.as_i64()).unwrap_or(100) as i32,
|
||||
display_name: payload.get("display_name").and_then(|v| v.as_str()).unwrap_or(&site_id).to_string(),
|
||||
created_at: existing.and_then(|e| e.created_at),
|
||||
};
|
||||
|
||||
registry.set_local_site(site.clone());
|
||||
json_response(StatusCode::OK, serde_json::to_value(&site).unwrap())
|
||||
}
|
||||
|
||||
pub async fn list_all_sites(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let registry = match &state.site_registry {
|
||||
Some(r) => r,
|
||||
None => return json_response(StatusCode::OK, serde_json::json!({"local": null, "peers": [], "total_peers": 0})),
|
||||
};
|
||||
|
||||
let local = registry.get_local_site();
|
||||
let peers = registry.list_peers();
|
||||
|
||||
json_response(StatusCode::OK, serde_json::json!({
|
||||
"local": local,
|
||||
"peers": peers,
|
||||
"total_peers": peers.len(),
|
||||
}))
|
||||
}
|
||||
|
||||
pub async fn register_peer_site(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
body: Body,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let registry = match &state.site_registry {
|
||||
Some(r) => r,
|
||||
None => return json_error("InvalidRequest", "Site registry not available", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
let payload = match read_json_body(body).await {
|
||||
Some(v) => v,
|
||||
None => return json_error("MalformedJSON", "Invalid JSON body", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
let site_id = match payload.get("site_id").and_then(|v| v.as_str()) {
|
||||
Some(s) => s.to_string(),
|
||||
None => return json_error("ValidationError", "site_id is required", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
if let Some(err) = validate_site_id(&site_id) {
|
||||
return json_error("ValidationError", &err, StatusCode::BAD_REQUEST);
|
||||
}
|
||||
|
||||
let endpoint = match payload.get("endpoint").and_then(|v| v.as_str()) {
|
||||
Some(e) => e.to_string(),
|
||||
None => return json_error("ValidationError", "endpoint is required", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
if let Some(err) = validate_endpoint(&endpoint) {
|
||||
return json_error("ValidationError", &err, StatusCode::BAD_REQUEST);
|
||||
}
|
||||
|
||||
let region = payload.get("region").and_then(|v| v.as_str()).unwrap_or("us-east-1").to_string();
|
||||
if let Some(err) = validate_region(®ion) {
|
||||
return json_error("ValidationError", &err, StatusCode::BAD_REQUEST);
|
||||
}
|
||||
|
||||
let priority = payload.get("priority").and_then(|v| v.as_i64()).unwrap_or(100);
|
||||
if let Some(err) = validate_priority(priority) {
|
||||
return json_error("ValidationError", &err, StatusCode::BAD_REQUEST);
|
||||
}
|
||||
|
||||
if registry.get_peer(&site_id).is_some() {
|
||||
return json_error("AlreadyExists", &format!("Peer site '{}' already exists", site_id), StatusCode::CONFLICT);
|
||||
}
|
||||
|
||||
let peer = PeerSite {
|
||||
site_id: site_id.clone(),
|
||||
endpoint,
|
||||
region,
|
||||
priority: priority as i32,
|
||||
display_name: payload.get("display_name").and_then(|v| v.as_str()).unwrap_or(&site_id).to_string(),
|
||||
connection_id: payload.get("connection_id").and_then(|v| v.as_str()).map(|s| s.to_string()),
|
||||
created_at: Some(chrono::Utc::now().to_rfc3339()),
|
||||
is_healthy: false,
|
||||
last_health_check: None,
|
||||
};
|
||||
|
||||
registry.add_peer(peer.clone());
|
||||
json_response(StatusCode::CREATED, serde_json::to_value(&peer).unwrap())
|
||||
}
|
||||
|
||||
pub async fn get_peer_site(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path(site_id): Path<String>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let registry = match &state.site_registry {
|
||||
Some(r) => r,
|
||||
None => return json_error("NotFound", "Site registry not available", StatusCode::NOT_FOUND),
|
||||
};
|
||||
|
||||
match registry.get_peer(&site_id) {
|
||||
Some(peer) => json_response(StatusCode::OK, serde_json::to_value(&peer).unwrap()),
|
||||
None => json_error("NotFound", &format!("Peer site '{}' not found", site_id), StatusCode::NOT_FOUND),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn update_peer_site(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path(site_id): Path<String>,
|
||||
body: Body,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let registry = match &state.site_registry {
|
||||
Some(r) => r,
|
||||
None => return json_error("NotFound", "Site registry not available", StatusCode::NOT_FOUND),
|
||||
};
|
||||
|
||||
let existing = match registry.get_peer(&site_id) {
|
||||
Some(p) => p,
|
||||
None => return json_error("NotFound", &format!("Peer site '{}' not found", site_id), StatusCode::NOT_FOUND),
|
||||
};
|
||||
|
||||
let payload = match read_json_body(body).await {
|
||||
Some(v) => v,
|
||||
None => return json_error("MalformedJSON", "Invalid JSON body", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
if let Some(ep) = payload.get("endpoint").and_then(|v| v.as_str()) {
|
||||
if let Some(err) = validate_endpoint(ep) {
|
||||
return json_error("ValidationError", &err, StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
if let Some(p) = payload.get("priority").and_then(|v| v.as_i64()) {
|
||||
if let Some(err) = validate_priority(p) {
|
||||
return json_error("ValidationError", &err, StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
if let Some(r) = payload.get("region").and_then(|v| v.as_str()) {
|
||||
if let Some(err) = validate_region(r) {
|
||||
return json_error("ValidationError", &err, StatusCode::BAD_REQUEST);
|
||||
}
|
||||
}
|
||||
|
||||
let peer = PeerSite {
|
||||
site_id: site_id.clone(),
|
||||
endpoint: payload.get("endpoint").and_then(|v| v.as_str()).unwrap_or(&existing.endpoint).to_string(),
|
||||
region: payload.get("region").and_then(|v| v.as_str()).unwrap_or(&existing.region).to_string(),
|
||||
priority: payload.get("priority").and_then(|v| v.as_i64()).unwrap_or(existing.priority as i64) as i32,
|
||||
display_name: payload.get("display_name").and_then(|v| v.as_str()).unwrap_or(&existing.display_name).to_string(),
|
||||
connection_id: payload.get("connection_id").and_then(|v| v.as_str()).map(|s| s.to_string()).or(existing.connection_id),
|
||||
created_at: existing.created_at,
|
||||
is_healthy: existing.is_healthy,
|
||||
last_health_check: existing.last_health_check,
|
||||
};
|
||||
|
||||
registry.update_peer(peer.clone());
|
||||
json_response(StatusCode::OK, serde_json::to_value(&peer).unwrap())
|
||||
}
|
||||
|
||||
pub async fn delete_peer_site(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path(site_id): Path<String>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let registry = match &state.site_registry {
|
||||
Some(r) => r,
|
||||
None => return json_error("NotFound", "Site registry not available", StatusCode::NOT_FOUND),
|
||||
};
|
||||
|
||||
if !registry.delete_peer(&site_id) {
|
||||
return json_error("NotFound", &format!("Peer site '{}' not found", site_id), StatusCode::NOT_FOUND);
|
||||
}
|
||||
StatusCode::NO_CONTENT.into_response()
|
||||
}
|
||||
|
||||
pub async fn check_peer_health(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path(site_id): Path<String>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let registry = match &state.site_registry {
|
||||
Some(r) => r,
|
||||
None => return json_error("NotFound", "Site registry not available", StatusCode::NOT_FOUND),
|
||||
};
|
||||
|
||||
if registry.get_peer(&site_id).is_none() {
|
||||
return json_error("NotFound", &format!("Peer site '{}' not found", site_id), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
json_response(StatusCode::OK, serde_json::json!({
|
||||
"site_id": site_id,
|
||||
"is_healthy": false,
|
||||
"error": "Health check not implemented in standalone mode",
|
||||
"checked_at": chrono::Utc::now().timestamp_millis() as f64 / 1000.0,
|
||||
}))
|
||||
}
|
||||
|
||||
pub async fn get_topology(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let registry = match &state.site_registry {
|
||||
Some(r) => r,
|
||||
None => return json_response(StatusCode::OK, serde_json::json!({"sites": [], "total": 0, "healthy_count": 0})),
|
||||
};
|
||||
|
||||
let local = registry.get_local_site();
|
||||
let peers = registry.list_peers();
|
||||
|
||||
let mut sites: Vec<serde_json::Value> = Vec::new();
|
||||
if let Some(l) = local {
|
||||
let mut v = serde_json::to_value(&l).unwrap();
|
||||
v.as_object_mut().unwrap().insert("is_local".to_string(), serde_json::json!(true));
|
||||
v.as_object_mut().unwrap().insert("is_healthy".to_string(), serde_json::json!(true));
|
||||
sites.push(v);
|
||||
}
|
||||
for p in &peers {
|
||||
let mut v = serde_json::to_value(p).unwrap();
|
||||
v.as_object_mut().unwrap().insert("is_local".to_string(), serde_json::json!(false));
|
||||
sites.push(v);
|
||||
}
|
||||
|
||||
sites.sort_by_key(|s| s.get("priority").and_then(|v| v.as_i64()).unwrap_or(100));
|
||||
|
||||
let healthy_count = sites.iter().filter(|s| s.get("is_healthy").and_then(|v| v.as_bool()).unwrap_or(false)).count();
|
||||
|
||||
json_response(StatusCode::OK, serde_json::json!({
|
||||
"sites": sites,
|
||||
"total": sites.len(),
|
||||
"healthy_count": healthy_count,
|
||||
}))
|
||||
}
|
||||
|
||||
pub async fn check_bidirectional_status(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path(site_id): Path<String>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let registry = match &state.site_registry {
|
||||
Some(r) => r,
|
||||
None => return json_error("NotFound", "Site registry not available", StatusCode::NOT_FOUND),
|
||||
};
|
||||
|
||||
if registry.get_peer(&site_id).is_none() {
|
||||
return json_error("NotFound", &format!("Peer site '{}' not found", site_id), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
let local = registry.get_local_site();
|
||||
json_response(StatusCode::OK, serde_json::json!({
|
||||
"site_id": site_id,
|
||||
"local_site_id": local.as_ref().map(|l| &l.site_id),
|
||||
"local_endpoint": local.as_ref().map(|l| &l.endpoint),
|
||||
"local_bidirectional_rules": [],
|
||||
"local_site_sync_enabled": false,
|
||||
"remote_status": null,
|
||||
"issues": [{"code": "NOT_IMPLEMENTED", "message": "Bidirectional status check not implemented in standalone mode", "severity": "warning"}],
|
||||
"is_fully_configured": false,
|
||||
}))
|
||||
}
|
||||
|
||||
pub async fn iam_list_users(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let users = state.iam.list_users().await;
|
||||
json_response(StatusCode::OK, serde_json::json!({"users": users}))
|
||||
}
|
||||
|
||||
pub async fn iam_get_user(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path(identifier): Path<String>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
match state.iam.get_user(&identifier).await {
|
||||
Some(user) => json_response(StatusCode::OK, user),
|
||||
None => json_error("NotFound", &format!("User '{}' not found", identifier), StatusCode::NOT_FOUND),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn iam_get_user_policies(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path(identifier): Path<String>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
match state.iam.get_user_policies(&identifier) {
|
||||
Some(policies) => json_response(StatusCode::OK, serde_json::json!({"policies": policies})),
|
||||
None => json_error("NotFound", &format!("User '{}' not found", identifier), StatusCode::NOT_FOUND),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn iam_create_access_key(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path(identifier): Path<String>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
match state.iam.create_access_key(&identifier) {
|
||||
Ok(result) => json_response(StatusCode::CREATED, result),
|
||||
Err(e) => json_error("InvalidRequest", &e, StatusCode::BAD_REQUEST),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn iam_delete_access_key(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path((_identifier, access_key)): Path<(String, String)>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
match state.iam.delete_access_key(&access_key) {
|
||||
Ok(()) => StatusCode::NO_CONTENT.into_response(),
|
||||
Err(e) => json_error("InvalidRequest", &e, StatusCode::BAD_REQUEST),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn iam_disable_user(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path(identifier): Path<String>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
match state.iam.set_user_enabled(&identifier, false).await {
|
||||
Ok(()) => json_response(StatusCode::OK, serde_json::json!({"status": "disabled"})),
|
||||
Err(e) => json_error("InvalidRequest", &e, StatusCode::BAD_REQUEST),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn iam_enable_user(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path(identifier): Path<String>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
match state.iam.set_user_enabled(&identifier, true).await {
|
||||
Ok(()) => json_response(StatusCode::OK, serde_json::json!({"status": "enabled"})),
|
||||
Err(e) => json_error("InvalidRequest", &e, StatusCode::BAD_REQUEST),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn list_website_domains(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let store = match &state.website_domains {
|
||||
Some(s) => s,
|
||||
None => return json_error("InvalidRequest", "Website hosting is not enabled", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
json_response(StatusCode::OK, serde_json::json!(store.list_all()))
|
||||
}
|
||||
|
||||
pub async fn create_website_domain(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
body: Body,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let store = match &state.website_domains {
|
||||
Some(s) => s,
|
||||
None => return json_error("InvalidRequest", "Website hosting is not enabled", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
let payload = match read_json_body(body).await {
|
||||
Some(v) => v,
|
||||
None => return json_error("MalformedJSON", "Invalid JSON body", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
let domain = normalize_domain(payload.get("domain").and_then(|v| v.as_str()).unwrap_or(""));
|
||||
if domain.is_empty() {
|
||||
return json_error("ValidationError", "domain is required", StatusCode::BAD_REQUEST);
|
||||
}
|
||||
if !is_valid_domain(&domain) {
|
||||
return json_error("ValidationError", &format!("Invalid domain: '{}'", domain), StatusCode::BAD_REQUEST);
|
||||
}
|
||||
|
||||
let bucket = payload.get("bucket").and_then(|v| v.as_str()).unwrap_or("").trim().to_string();
|
||||
if bucket.is_empty() {
|
||||
return json_error("ValidationError", "bucket is required", StatusCode::BAD_REQUEST);
|
||||
}
|
||||
|
||||
match state.storage.bucket_exists(&bucket).await {
|
||||
Ok(true) => {}
|
||||
_ => return json_error("NoSuchBucket", &format!("Bucket '{}' does not exist", bucket), StatusCode::NOT_FOUND),
|
||||
}
|
||||
|
||||
if store.get_bucket(&domain).is_some() {
|
||||
return json_error("Conflict", &format!("Domain '{}' is already mapped", domain), StatusCode::CONFLICT);
|
||||
}
|
||||
|
||||
store.set_mapping(&domain, &bucket);
|
||||
json_response(StatusCode::CREATED, serde_json::json!({"domain": domain, "bucket": bucket}))
|
||||
}
|
||||
|
||||
pub async fn get_website_domain(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path(domain): Path<String>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let store = match &state.website_domains {
|
||||
Some(s) => s,
|
||||
None => return json_error("InvalidRequest", "Website hosting is not enabled", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
let domain = normalize_domain(&domain);
|
||||
match store.get_bucket(&domain) {
|
||||
Some(bucket) => json_response(StatusCode::OK, serde_json::json!({"domain": domain, "bucket": bucket})),
|
||||
None => json_error("NotFound", &format!("No mapping found for domain '{}'", domain), StatusCode::NOT_FOUND),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn update_website_domain(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path(domain): Path<String>,
|
||||
body: Body,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let store = match &state.website_domains {
|
||||
Some(s) => s,
|
||||
None => return json_error("InvalidRequest", "Website hosting is not enabled", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
let domain = normalize_domain(&domain);
|
||||
let payload = match read_json_body(body).await {
|
||||
Some(v) => v,
|
||||
None => return json_error("MalformedJSON", "Invalid JSON body", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
let bucket = payload.get("bucket").and_then(|v| v.as_str()).unwrap_or("").trim().to_string();
|
||||
if bucket.is_empty() {
|
||||
return json_error("ValidationError", "bucket is required", StatusCode::BAD_REQUEST);
|
||||
}
|
||||
|
||||
match state.storage.bucket_exists(&bucket).await {
|
||||
Ok(true) => {}
|
||||
_ => return json_error("NoSuchBucket", &format!("Bucket '{}' does not exist", bucket), StatusCode::NOT_FOUND),
|
||||
}
|
||||
|
||||
if store.get_bucket(&domain).is_none() {
|
||||
return json_error("NotFound", &format!("No mapping found for domain '{}'", domain), StatusCode::NOT_FOUND);
|
||||
}
|
||||
|
||||
store.set_mapping(&domain, &bucket);
|
||||
json_response(StatusCode::OK, serde_json::json!({"domain": domain, "bucket": bucket}))
|
||||
}
|
||||
|
||||
pub async fn delete_website_domain(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
Path(domain): Path<String>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let store = match &state.website_domains {
|
||||
Some(s) => s,
|
||||
None => return json_error("InvalidRequest", "Website hosting is not enabled", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
let domain = normalize_domain(&domain);
|
||||
if !store.delete_mapping(&domain) {
|
||||
return json_error("NotFound", &format!("No mapping found for domain '{}'", domain), StatusCode::NOT_FOUND);
|
||||
}
|
||||
StatusCode::NO_CONTENT.into_response()
|
||||
}
|
||||
|
||||
#[derive(serde::Deserialize, Default)]
|
||||
pub struct PaginationQuery {
|
||||
pub limit: Option<usize>,
|
||||
pub offset: Option<usize>,
|
||||
}
|
||||
|
||||
pub async fn gc_status(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
match &state.gc {
|
||||
Some(gc) => json_response(StatusCode::OK, gc.status().await),
|
||||
None => json_response(StatusCode::OK, serde_json::json!({"enabled": false, "message": "GC is not enabled. Set GC_ENABLED=true to enable."})),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn gc_run(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
body: Body,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let gc = match &state.gc {
|
||||
Some(gc) => gc,
|
||||
None => return json_error("InvalidRequest", "GC is not enabled", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
let payload = read_json_body(body).await.unwrap_or(serde_json::json!({}));
|
||||
let dry_run = payload.get("dry_run").and_then(|v| v.as_bool()).unwrap_or(false);
|
||||
|
||||
match gc.run_now(dry_run).await {
|
||||
Ok(result) => json_response(StatusCode::OK, result),
|
||||
Err(e) => json_error("Conflict", &e, StatusCode::CONFLICT),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn gc_history(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
match &state.gc {
|
||||
Some(gc) => json_response(StatusCode::OK, serde_json::json!({"executions": gc.history().await})),
|
||||
None => json_response(StatusCode::OK, serde_json::json!({"executions": []})),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn integrity_status(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
match &state.integrity {
|
||||
Some(checker) => json_response(StatusCode::OK, checker.status().await),
|
||||
None => json_response(StatusCode::OK, serde_json::json!({"enabled": false, "message": "Integrity checker is not enabled. Set INTEGRITY_ENABLED=true to enable."})),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn integrity_run(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
body: Body,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
let checker = match &state.integrity {
|
||||
Some(c) => c,
|
||||
None => return json_error("InvalidRequest", "Integrity checker is not enabled", StatusCode::BAD_REQUEST),
|
||||
};
|
||||
|
||||
let payload = read_json_body(body).await.unwrap_or(serde_json::json!({}));
|
||||
let dry_run = payload.get("dry_run").and_then(|v| v.as_bool()).unwrap_or(false);
|
||||
let auto_heal = payload.get("auto_heal").and_then(|v| v.as_bool()).unwrap_or(false);
|
||||
|
||||
match checker.run_now(dry_run, auto_heal).await {
|
||||
Ok(result) => json_response(StatusCode::OK, result),
|
||||
Err(e) => json_error("Conflict", &e, StatusCode::CONFLICT),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn integrity_history(
|
||||
State(state): State<AppState>,
|
||||
Extension(principal): Extension<Principal>,
|
||||
) -> Response {
|
||||
if let Some(err) = require_admin(&principal) { return err; }
|
||||
match &state.integrity {
|
||||
Some(checker) => json_response(StatusCode::OK, serde_json::json!({"executions": checker.history().await})),
|
||||
None => json_response(StatusCode::OK, serde_json::json!({"executions": []})),
|
||||
}
|
||||
}
|
||||
1003
myfsio-engine/crates/myfsio-server/src/handlers/config.rs
Normal file
1003
myfsio-engine/crates/myfsio-server/src/handlers/config.rs
Normal file
File diff suppressed because it is too large
Load Diff
278
myfsio-engine/crates/myfsio-server/src/handlers/kms.rs
Normal file
278
myfsio-engine/crates/myfsio-server/src/handlers/kms.rs
Normal file
@@ -0,0 +1,278 @@
|
||||
use axum::body::Body;
|
||||
use axum::extract::State;
|
||||
use axum::http::StatusCode;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use base64::engine::general_purpose::STANDARD as B64;
|
||||
use base64::Engine;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::state::AppState;
|
||||
|
||||
fn json_ok(value: serde_json::Value) -> Response {
|
||||
(
|
||||
StatusCode::OK,
|
||||
[("content-type", "application/json")],
|
||||
value.to_string(),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
fn json_err(status: StatusCode, msg: &str) -> Response {
|
||||
(
|
||||
status,
|
||||
[("content-type", "application/json")],
|
||||
json!({"error": msg}).to_string(),
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
pub async fn list_keys(State(state): State<AppState>) -> Response {
|
||||
let kms = match &state.kms {
|
||||
Some(k) => k,
|
||||
None => return json_err(StatusCode::SERVICE_UNAVAILABLE, "KMS not enabled"),
|
||||
};
|
||||
|
||||
let keys = kms.list_keys().await;
|
||||
let keys_json: Vec<serde_json::Value> = keys
|
||||
.iter()
|
||||
.map(|k| {
|
||||
json!({
|
||||
"KeyId": k.key_id,
|
||||
"Arn": k.arn,
|
||||
"Description": k.description,
|
||||
"CreationDate": k.creation_date.to_rfc3339(),
|
||||
"Enabled": k.enabled,
|
||||
"KeyState": k.key_state,
|
||||
"KeyUsage": k.key_usage,
|
||||
"KeySpec": k.key_spec,
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
||||
json_ok(json!({"keys": keys_json}))
|
||||
}
|
||||
|
||||
pub async fn create_key(State(state): State<AppState>, body: Body) -> Response {
|
||||
let kms = match &state.kms {
|
||||
Some(k) => k,
|
||||
None => return json_err(StatusCode::SERVICE_UNAVAILABLE, "KMS not enabled"),
|
||||
};
|
||||
|
||||
let body_bytes = match http_body_util::BodyExt::collect(body).await {
|
||||
Ok(c) => c.to_bytes(),
|
||||
Err(_) => return json_err(StatusCode::BAD_REQUEST, "Invalid request body"),
|
||||
};
|
||||
|
||||
let description = if body_bytes.is_empty() {
|
||||
String::new()
|
||||
} else {
|
||||
match serde_json::from_slice::<serde_json::Value>(&body_bytes) {
|
||||
Ok(v) => v
|
||||
.get("Description")
|
||||
.or_else(|| v.get("description"))
|
||||
.and_then(|d| d.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string(),
|
||||
Err(_) => String::new(),
|
||||
}
|
||||
};
|
||||
|
||||
match kms.create_key(&description).await {
|
||||
Ok(key) => json_ok(json!({
|
||||
"KeyId": key.key_id,
|
||||
"Arn": key.arn,
|
||||
"Description": key.description,
|
||||
"CreationDate": key.creation_date.to_rfc3339(),
|
||||
"Enabled": key.enabled,
|
||||
"KeyState": key.key_state,
|
||||
})),
|
||||
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_key(
|
||||
State(state): State<AppState>,
|
||||
axum::extract::Path(key_id): axum::extract::Path<String>,
|
||||
) -> Response {
|
||||
let kms = match &state.kms {
|
||||
Some(k) => k,
|
||||
None => return json_err(StatusCode::SERVICE_UNAVAILABLE, "KMS not enabled"),
|
||||
};
|
||||
|
||||
match kms.get_key(&key_id).await {
|
||||
Some(key) => json_ok(json!({
|
||||
"KeyId": key.key_id,
|
||||
"Arn": key.arn,
|
||||
"Description": key.description,
|
||||
"CreationDate": key.creation_date.to_rfc3339(),
|
||||
"Enabled": key.enabled,
|
||||
"KeyState": key.key_state,
|
||||
"KeyUsage": key.key_usage,
|
||||
"KeySpec": key.key_spec,
|
||||
})),
|
||||
None => json_err(StatusCode::NOT_FOUND, "Key not found"),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn delete_key(
|
||||
State(state): State<AppState>,
|
||||
axum::extract::Path(key_id): axum::extract::Path<String>,
|
||||
) -> Response {
|
||||
let kms = match &state.kms {
|
||||
Some(k) => k,
|
||||
None => return json_err(StatusCode::SERVICE_UNAVAILABLE, "KMS not enabled"),
|
||||
};
|
||||
|
||||
match kms.delete_key(&key_id).await {
|
||||
Ok(true) => StatusCode::NO_CONTENT.into_response(),
|
||||
Ok(false) => json_err(StatusCode::NOT_FOUND, "Key not found"),
|
||||
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn enable_key(
|
||||
State(state): State<AppState>,
|
||||
axum::extract::Path(key_id): axum::extract::Path<String>,
|
||||
) -> Response {
|
||||
let kms = match &state.kms {
|
||||
Some(k) => k,
|
||||
None => return json_err(StatusCode::SERVICE_UNAVAILABLE, "KMS not enabled"),
|
||||
};
|
||||
|
||||
match kms.enable_key(&key_id).await {
|
||||
Ok(true) => json_ok(json!({"status": "enabled"})),
|
||||
Ok(false) => json_err(StatusCode::NOT_FOUND, "Key not found"),
|
||||
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn disable_key(
|
||||
State(state): State<AppState>,
|
||||
axum::extract::Path(key_id): axum::extract::Path<String>,
|
||||
) -> Response {
|
||||
let kms = match &state.kms {
|
||||
Some(k) => k,
|
||||
None => return json_err(StatusCode::SERVICE_UNAVAILABLE, "KMS not enabled"),
|
||||
};
|
||||
|
||||
match kms.disable_key(&key_id).await {
|
||||
Ok(true) => json_ok(json!({"status": "disabled"})),
|
||||
Ok(false) => json_err(StatusCode::NOT_FOUND, "Key not found"),
|
||||
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn encrypt(State(state): State<AppState>, body: Body) -> Response {
|
||||
let kms = match &state.kms {
|
||||
Some(k) => k,
|
||||
None => return json_err(StatusCode::SERVICE_UNAVAILABLE, "KMS not enabled"),
|
||||
};
|
||||
|
||||
let body_bytes = match http_body_util::BodyExt::collect(body).await {
|
||||
Ok(c) => c.to_bytes(),
|
||||
Err(_) => return json_err(StatusCode::BAD_REQUEST, "Invalid request body"),
|
||||
};
|
||||
|
||||
let req: serde_json::Value = match serde_json::from_slice(&body_bytes) {
|
||||
Ok(v) => v,
|
||||
Err(_) => return json_err(StatusCode::BAD_REQUEST, "Invalid JSON"),
|
||||
};
|
||||
|
||||
let key_id = match req.get("KeyId").and_then(|v| v.as_str()) {
|
||||
Some(k) => k,
|
||||
None => return json_err(StatusCode::BAD_REQUEST, "Missing KeyId"),
|
||||
};
|
||||
let plaintext_b64 = match req.get("Plaintext").and_then(|v| v.as_str()) {
|
||||
Some(p) => p,
|
||||
None => return json_err(StatusCode::BAD_REQUEST, "Missing Plaintext"),
|
||||
};
|
||||
let plaintext = match B64.decode(plaintext_b64) {
|
||||
Ok(p) => p,
|
||||
Err(_) => return json_err(StatusCode::BAD_REQUEST, "Invalid base64 Plaintext"),
|
||||
};
|
||||
|
||||
match kms.encrypt_data(key_id, &plaintext).await {
|
||||
Ok(ct) => json_ok(json!({
|
||||
"KeyId": key_id,
|
||||
"CiphertextBlob": B64.encode(&ct),
|
||||
})),
|
||||
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn decrypt(State(state): State<AppState>, body: Body) -> Response {
|
||||
let kms = match &state.kms {
|
||||
Some(k) => k,
|
||||
None => return json_err(StatusCode::SERVICE_UNAVAILABLE, "KMS not enabled"),
|
||||
};
|
||||
|
||||
let body_bytes = match http_body_util::BodyExt::collect(body).await {
|
||||
Ok(c) => c.to_bytes(),
|
||||
Err(_) => return json_err(StatusCode::BAD_REQUEST, "Invalid request body"),
|
||||
};
|
||||
|
||||
let req: serde_json::Value = match serde_json::from_slice(&body_bytes) {
|
||||
Ok(v) => v,
|
||||
Err(_) => return json_err(StatusCode::BAD_REQUEST, "Invalid JSON"),
|
||||
};
|
||||
|
||||
let key_id = match req.get("KeyId").and_then(|v| v.as_str()) {
|
||||
Some(k) => k,
|
||||
None => return json_err(StatusCode::BAD_REQUEST, "Missing KeyId"),
|
||||
};
|
||||
let ct_b64 = match req.get("CiphertextBlob").and_then(|v| v.as_str()) {
|
||||
Some(c) => c,
|
||||
None => return json_err(StatusCode::BAD_REQUEST, "Missing CiphertextBlob"),
|
||||
};
|
||||
let ciphertext = match B64.decode(ct_b64) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return json_err(StatusCode::BAD_REQUEST, "Invalid base64"),
|
||||
};
|
||||
|
||||
match kms.decrypt_data(key_id, &ciphertext).await {
|
||||
Ok(pt) => json_ok(json!({
|
||||
"KeyId": key_id,
|
||||
"Plaintext": B64.encode(&pt),
|
||||
})),
|
||||
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn generate_data_key(State(state): State<AppState>, body: Body) -> Response {
|
||||
let kms = match &state.kms {
|
||||
Some(k) => k,
|
||||
None => return json_err(StatusCode::SERVICE_UNAVAILABLE, "KMS not enabled"),
|
||||
};
|
||||
|
||||
let body_bytes = match http_body_util::BodyExt::collect(body).await {
|
||||
Ok(c) => c.to_bytes(),
|
||||
Err(_) => return json_err(StatusCode::BAD_REQUEST, "Invalid request body"),
|
||||
};
|
||||
|
||||
let req: serde_json::Value = match serde_json::from_slice(&body_bytes) {
|
||||
Ok(v) => v,
|
||||
Err(_) => return json_err(StatusCode::BAD_REQUEST, "Invalid JSON"),
|
||||
};
|
||||
|
||||
let key_id = match req.get("KeyId").and_then(|v| v.as_str()) {
|
||||
Some(k) => k,
|
||||
None => return json_err(StatusCode::BAD_REQUEST, "Missing KeyId"),
|
||||
};
|
||||
let num_bytes = req
|
||||
.get("NumberOfBytes")
|
||||
.and_then(|v| v.as_u64())
|
||||
.unwrap_or(32) as usize;
|
||||
|
||||
if num_bytes < 1 || num_bytes > 1024 {
|
||||
return json_err(StatusCode::BAD_REQUEST, "NumberOfBytes must be 1-1024");
|
||||
}
|
||||
|
||||
match kms.generate_data_key(key_id, num_bytes).await {
|
||||
Ok((plaintext, wrapped)) => json_ok(json!({
|
||||
"KeyId": key_id,
|
||||
"Plaintext": B64.encode(&plaintext),
|
||||
"CiphertextBlob": B64.encode(&wrapped),
|
||||
})),
|
||||
Err(e) => json_err(StatusCode::INTERNAL_SERVER_ERROR, &e.to_string()),
|
||||
}
|
||||
}
|
||||
1398
myfsio-engine/crates/myfsio-server/src/handlers/mod.rs
Normal file
1398
myfsio-engine/crates/myfsio-server/src/handlers/mod.rs
Normal file
File diff suppressed because it is too large
Load Diff
552
myfsio-engine/crates/myfsio-server/src/handlers/select.rs
Normal file
552
myfsio-engine/crates/myfsio-server/src/handlers/select.rs
Normal file
@@ -0,0 +1,552 @@
|
||||
use std::collections::HashMap;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use axum::body::Body;
|
||||
use axum::http::{HeaderMap, HeaderName, StatusCode};
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use base64::Engine;
|
||||
use bytes::Bytes;
|
||||
use crc32fast::Hasher;
|
||||
use duckdb::types::ValueRef;
|
||||
use duckdb::Connection;
|
||||
use futures::stream;
|
||||
use http_body_util::BodyExt;
|
||||
use myfsio_common::error::{S3Error, S3ErrorCode};
|
||||
use myfsio_storage::traits::StorageEngine;
|
||||
|
||||
use crate::state::AppState;
|
||||
|
||||
#[cfg(target_os = "windows")]
|
||||
#[link(name = "Rstrtmgr")]
|
||||
extern "system" {}
|
||||
|
||||
const CHUNK_SIZE: usize = 65_536;
|
||||
|
||||
pub async fn post_select_object_content(
|
||||
state: &AppState,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
headers: &HeaderMap,
|
||||
body: Body,
|
||||
) -> Response {
|
||||
if let Some(resp) = require_xml_content_type(headers) {
|
||||
return resp;
|
||||
}
|
||||
|
||||
let body_bytes = match body.collect().await {
|
||||
Ok(collected) => collected.to_bytes(),
|
||||
Err(_) => {
|
||||
return s3_error_response(S3Error::new(
|
||||
S3ErrorCode::MalformedXML,
|
||||
"Unable to parse XML document",
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let request = match parse_select_request(&body_bytes) {
|
||||
Ok(r) => r,
|
||||
Err(err) => return s3_error_response(err),
|
||||
};
|
||||
|
||||
let object_path = match state.storage.get_object_path(bucket, key).await {
|
||||
Ok(path) => path,
|
||||
Err(_) => {
|
||||
return s3_error_response(S3Error::new(
|
||||
S3ErrorCode::NoSuchKey,
|
||||
"Object not found",
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let join_res = tokio::task::spawn_blocking(move || execute_select_query(object_path, request)).await;
|
||||
let chunks = match join_res {
|
||||
Ok(Ok(chunks)) => chunks,
|
||||
Ok(Err(message)) => {
|
||||
return s3_error_response(S3Error::new(S3ErrorCode::InvalidRequest, message));
|
||||
}
|
||||
Err(_) => {
|
||||
return s3_error_response(S3Error::new(
|
||||
S3ErrorCode::InternalError,
|
||||
"SelectObjectContent execution failed",
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
let bytes_returned: usize = chunks.iter().map(|c| c.len()).sum();
|
||||
let mut events: Vec<Bytes> = Vec::with_capacity(chunks.len() + 2);
|
||||
for chunk in chunks {
|
||||
events.push(Bytes::from(encode_select_event("Records", &chunk)));
|
||||
}
|
||||
|
||||
let stats_payload = build_stats_xml(0, bytes_returned);
|
||||
events.push(Bytes::from(encode_select_event("Stats", stats_payload.as_bytes())));
|
||||
events.push(Bytes::from(encode_select_event("End", b"")));
|
||||
|
||||
let stream = stream::iter(events.into_iter().map(Ok::<Bytes, std::io::Error>));
|
||||
let body = Body::from_stream(stream);
|
||||
|
||||
let mut response = (StatusCode::OK, body).into_response();
|
||||
response.headers_mut().insert(
|
||||
HeaderName::from_static("content-type"),
|
||||
"application/octet-stream".parse().unwrap(),
|
||||
);
|
||||
response.headers_mut().insert(
|
||||
HeaderName::from_static("x-amz-request-charged"),
|
||||
"requester".parse().unwrap(),
|
||||
);
|
||||
response
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct SelectRequest {
|
||||
expression: String,
|
||||
input_format: InputFormat,
|
||||
output_format: OutputFormat,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum InputFormat {
|
||||
Csv(CsvInputConfig),
|
||||
Json(JsonInputConfig),
|
||||
Parquet,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct CsvInputConfig {
|
||||
file_header_info: String,
|
||||
field_delimiter: String,
|
||||
quote_character: String,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct JsonInputConfig {
|
||||
json_type: String,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum OutputFormat {
|
||||
Csv(CsvOutputConfig),
|
||||
Json(JsonOutputConfig),
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct CsvOutputConfig {
|
||||
field_delimiter: String,
|
||||
record_delimiter: String,
|
||||
quote_character: String,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct JsonOutputConfig {
|
||||
record_delimiter: String,
|
||||
}
|
||||
|
||||
fn parse_select_request(payload: &[u8]) -> Result<SelectRequest, S3Error> {
|
||||
let xml = String::from_utf8_lossy(payload);
|
||||
let doc = roxmltree::Document::parse(&xml)
|
||||
.map_err(|_| S3Error::new(S3ErrorCode::MalformedXML, "Unable to parse XML document"))?;
|
||||
|
||||
let root = doc.root_element();
|
||||
if root.tag_name().name() != "SelectObjectContentRequest" {
|
||||
return Err(S3Error::new(
|
||||
S3ErrorCode::MalformedXML,
|
||||
"Root element must be SelectObjectContentRequest",
|
||||
));
|
||||
}
|
||||
|
||||
let expression = child_text(&root, "Expression")
|
||||
.filter(|v| !v.is_empty())
|
||||
.ok_or_else(|| S3Error::new(S3ErrorCode::InvalidRequest, "Expression is required"))?;
|
||||
|
||||
let expression_type = child_text(&root, "ExpressionType").unwrap_or_else(|| "SQL".to_string());
|
||||
if !expression_type.eq_ignore_ascii_case("SQL") {
|
||||
return Err(S3Error::new(
|
||||
S3ErrorCode::InvalidRequest,
|
||||
"Only SQL expression type is supported",
|
||||
));
|
||||
}
|
||||
|
||||
let input_node = child(&root, "InputSerialization")
|
||||
.ok_or_else(|| S3Error::new(S3ErrorCode::InvalidRequest, "InputSerialization is required"))?;
|
||||
let output_node = child(&root, "OutputSerialization")
|
||||
.ok_or_else(|| S3Error::new(S3ErrorCode::InvalidRequest, "OutputSerialization is required"))?;
|
||||
|
||||
let input_format = parse_input_format(&input_node)?;
|
||||
let output_format = parse_output_format(&output_node)?;
|
||||
|
||||
Ok(SelectRequest {
|
||||
expression,
|
||||
input_format,
|
||||
output_format,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_input_format(node: &roxmltree::Node<'_, '_>) -> Result<InputFormat, S3Error> {
|
||||
if let Some(csv_node) = child(node, "CSV") {
|
||||
return Ok(InputFormat::Csv(CsvInputConfig {
|
||||
file_header_info: child_text(&csv_node, "FileHeaderInfo")
|
||||
.unwrap_or_else(|| "NONE".to_string())
|
||||
.to_ascii_uppercase(),
|
||||
field_delimiter: child_text(&csv_node, "FieldDelimiter").unwrap_or_else(|| ",".to_string()),
|
||||
quote_character: child_text(&csv_node, "QuoteCharacter").unwrap_or_else(|| "\"".to_string()),
|
||||
}));
|
||||
}
|
||||
|
||||
if let Some(json_node) = child(node, "JSON") {
|
||||
return Ok(InputFormat::Json(JsonInputConfig {
|
||||
json_type: child_text(&json_node, "Type")
|
||||
.unwrap_or_else(|| "DOCUMENT".to_string())
|
||||
.to_ascii_uppercase(),
|
||||
}));
|
||||
}
|
||||
|
||||
if child(node, "Parquet").is_some() {
|
||||
return Ok(InputFormat::Parquet);
|
||||
}
|
||||
|
||||
Err(S3Error::new(
|
||||
S3ErrorCode::InvalidRequest,
|
||||
"InputSerialization must specify CSV, JSON, or Parquet",
|
||||
))
|
||||
}
|
||||
|
||||
fn parse_output_format(node: &roxmltree::Node<'_, '_>) -> Result<OutputFormat, S3Error> {
|
||||
if let Some(csv_node) = child(node, "CSV") {
|
||||
return Ok(OutputFormat::Csv(CsvOutputConfig {
|
||||
field_delimiter: child_text(&csv_node, "FieldDelimiter").unwrap_or_else(|| ",".to_string()),
|
||||
record_delimiter: child_text(&csv_node, "RecordDelimiter").unwrap_or_else(|| "\n".to_string()),
|
||||
quote_character: child_text(&csv_node, "QuoteCharacter").unwrap_or_else(|| "\"".to_string()),
|
||||
}));
|
||||
}
|
||||
|
||||
if let Some(json_node) = child(node, "JSON") {
|
||||
return Ok(OutputFormat::Json(JsonOutputConfig {
|
||||
record_delimiter: child_text(&json_node, "RecordDelimiter").unwrap_or_else(|| "\n".to_string()),
|
||||
}));
|
||||
}
|
||||
|
||||
Err(S3Error::new(
|
||||
S3ErrorCode::InvalidRequest,
|
||||
"OutputSerialization must specify CSV or JSON",
|
||||
))
|
||||
}
|
||||
|
||||
fn child<'a, 'input>(node: &'a roxmltree::Node<'a, 'input>, name: &str) -> Option<roxmltree::Node<'a, 'input>> {
|
||||
node.children()
|
||||
.find(|n| n.is_element() && n.tag_name().name() == name)
|
||||
}
|
||||
|
||||
fn child_text(node: &roxmltree::Node<'_, '_>, name: &str) -> Option<String> {
|
||||
child(node, name)
|
||||
.and_then(|n| n.text())
|
||||
.map(|s| s.to_string())
|
||||
}
|
||||
|
||||
fn execute_select_query(path: PathBuf, request: SelectRequest) -> Result<Vec<Vec<u8>>, String> {
|
||||
let conn = Connection::open_in_memory().map_err(|e| format!("DuckDB connection error: {}", e))?;
|
||||
|
||||
load_input_table(&conn, &path, &request.input_format)?;
|
||||
|
||||
let expression = request
|
||||
.expression
|
||||
.replace("s3object", "data")
|
||||
.replace("S3Object", "data");
|
||||
|
||||
let mut stmt = conn
|
||||
.prepare(&expression)
|
||||
.map_err(|e| format!("SQL execution error: {}", e))?;
|
||||
let mut rows = stmt
|
||||
.query([])
|
||||
.map_err(|e| format!("SQL execution error: {}", e))?;
|
||||
let stmt_ref = rows
|
||||
.as_ref()
|
||||
.ok_or_else(|| "SQL execution error: statement metadata unavailable".to_string())?;
|
||||
let col_count = stmt_ref.column_count();
|
||||
let mut columns: Vec<String> = Vec::with_capacity(col_count);
|
||||
for i in 0..col_count {
|
||||
let name = stmt_ref
|
||||
.column_name(i)
|
||||
.map(|s| s.to_string())
|
||||
.unwrap_or_else(|_| format!("_{}", i));
|
||||
columns.push(name);
|
||||
}
|
||||
|
||||
match request.output_format {
|
||||
OutputFormat::Csv(cfg) => collect_csv_chunks(&mut rows, col_count, cfg),
|
||||
OutputFormat::Json(cfg) => collect_json_chunks(&mut rows, col_count, &columns, cfg),
|
||||
}
|
||||
}
|
||||
|
||||
fn load_input_table(conn: &Connection, path: &Path, input: &InputFormat) -> Result<(), String> {
|
||||
let path_str = path.to_string_lossy().replace('\\', "/");
|
||||
match input {
|
||||
InputFormat::Csv(cfg) => {
|
||||
let header = cfg.file_header_info == "USE" || cfg.file_header_info == "IGNORE";
|
||||
let delimiter = normalize_single_char(&cfg.field_delimiter, ',');
|
||||
let quote = normalize_single_char(&cfg.quote_character, '"');
|
||||
|
||||
let sql = format!(
|
||||
"CREATE TABLE data AS SELECT * FROM read_csv('{}', header={}, delim='{}', quote='{}')",
|
||||
sql_escape(&path_str),
|
||||
if header { "true" } else { "false" },
|
||||
sql_escape(&delimiter),
|
||||
sql_escape("e)
|
||||
);
|
||||
conn.execute_batch(&sql)
|
||||
.map_err(|e| format!("Failed loading CSV data: {}", e))?;
|
||||
}
|
||||
InputFormat::Json(cfg) => {
|
||||
let format = if cfg.json_type == "LINES" {
|
||||
"newline_delimited"
|
||||
} else {
|
||||
"array"
|
||||
};
|
||||
let sql = format!(
|
||||
"CREATE TABLE data AS SELECT * FROM read_json_auto('{}', format='{}')",
|
||||
sql_escape(&path_str),
|
||||
format
|
||||
);
|
||||
conn.execute_batch(&sql)
|
||||
.map_err(|e| format!("Failed loading JSON data: {}", e))?;
|
||||
}
|
||||
InputFormat::Parquet => {
|
||||
let sql = format!(
|
||||
"CREATE TABLE data AS SELECT * FROM read_parquet('{}')",
|
||||
sql_escape(&path_str)
|
||||
);
|
||||
conn.execute_batch(&sql)
|
||||
.map_err(|e| format!("Failed loading Parquet data: {}", e))?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn sql_escape(value: &str) -> String {
|
||||
value.replace('\'', "''")
|
||||
}
|
||||
|
||||
fn normalize_single_char(value: &str, default_char: char) -> String {
|
||||
value.chars().next().unwrap_or(default_char).to_string()
|
||||
}
|
||||
|
||||
fn collect_csv_chunks(
|
||||
rows: &mut duckdb::Rows<'_>,
|
||||
col_count: usize,
|
||||
cfg: CsvOutputConfig,
|
||||
) -> Result<Vec<Vec<u8>>, String> {
|
||||
let delimiter = cfg.field_delimiter;
|
||||
let record_delimiter = cfg.record_delimiter;
|
||||
let quote = cfg.quote_character;
|
||||
|
||||
let mut chunks: Vec<Vec<u8>> = Vec::new();
|
||||
let mut buffer = String::new();
|
||||
|
||||
while let Some(row) = rows.next().map_err(|e| format!("SQL execution error: {}", e))? {
|
||||
let mut fields: Vec<String> = Vec::with_capacity(col_count);
|
||||
for i in 0..col_count {
|
||||
let value = row
|
||||
.get_ref(i)
|
||||
.map_err(|e| format!("SQL execution error: {}", e))?;
|
||||
if matches!(value, ValueRef::Null) {
|
||||
fields.push(String::new());
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut text = value_ref_to_string(value);
|
||||
if text.contains(&delimiter) || text.contains("e) || text.contains(&record_delimiter) {
|
||||
text = text.replace("e, &(quote.clone() + "e));
|
||||
text = format!("{}{}{}", quote, text, quote);
|
||||
}
|
||||
fields.push(text);
|
||||
}
|
||||
buffer.push_str(&fields.join(&delimiter));
|
||||
buffer.push_str(&record_delimiter);
|
||||
|
||||
while buffer.len() >= CHUNK_SIZE {
|
||||
let rest = buffer.split_off(CHUNK_SIZE);
|
||||
chunks.push(buffer.into_bytes());
|
||||
buffer = rest;
|
||||
}
|
||||
}
|
||||
|
||||
if !buffer.is_empty() {
|
||||
chunks.push(buffer.into_bytes());
|
||||
}
|
||||
Ok(chunks)
|
||||
}
|
||||
|
||||
fn collect_json_chunks(
|
||||
rows: &mut duckdb::Rows<'_>,
|
||||
col_count: usize,
|
||||
columns: &[String],
|
||||
cfg: JsonOutputConfig,
|
||||
) -> Result<Vec<Vec<u8>>, String> {
|
||||
let record_delimiter = cfg.record_delimiter;
|
||||
let mut chunks: Vec<Vec<u8>> = Vec::new();
|
||||
let mut buffer = String::new();
|
||||
|
||||
while let Some(row) = rows.next().map_err(|e| format!("SQL execution error: {}", e))? {
|
||||
let mut record: HashMap<String, serde_json::Value> = HashMap::with_capacity(col_count);
|
||||
for i in 0..col_count {
|
||||
let value = row
|
||||
.get_ref(i)
|
||||
.map_err(|e| format!("SQL execution error: {}", e))?;
|
||||
let key = columns
|
||||
.get(i)
|
||||
.cloned()
|
||||
.unwrap_or_else(|| format!("_{}", i));
|
||||
record.insert(key, value_ref_to_json(value));
|
||||
}
|
||||
let line = serde_json::to_string(&record)
|
||||
.map_err(|e| format!("JSON output encoding failed: {}", e))?;
|
||||
buffer.push_str(&line);
|
||||
buffer.push_str(&record_delimiter);
|
||||
|
||||
while buffer.len() >= CHUNK_SIZE {
|
||||
let rest = buffer.split_off(CHUNK_SIZE);
|
||||
chunks.push(buffer.into_bytes());
|
||||
buffer = rest;
|
||||
}
|
||||
}
|
||||
|
||||
if !buffer.is_empty() {
|
||||
chunks.push(buffer.into_bytes());
|
||||
}
|
||||
Ok(chunks)
|
||||
}
|
||||
|
||||
fn value_ref_to_string(value: ValueRef<'_>) -> String {
|
||||
match value {
|
||||
ValueRef::Null => String::new(),
|
||||
ValueRef::Boolean(v) => v.to_string(),
|
||||
ValueRef::TinyInt(v) => v.to_string(),
|
||||
ValueRef::SmallInt(v) => v.to_string(),
|
||||
ValueRef::Int(v) => v.to_string(),
|
||||
ValueRef::BigInt(v) => v.to_string(),
|
||||
ValueRef::UTinyInt(v) => v.to_string(),
|
||||
ValueRef::USmallInt(v) => v.to_string(),
|
||||
ValueRef::UInt(v) => v.to_string(),
|
||||
ValueRef::UBigInt(v) => v.to_string(),
|
||||
ValueRef::Float(v) => v.to_string(),
|
||||
ValueRef::Double(v) => v.to_string(),
|
||||
ValueRef::Decimal(v) => v.to_string(),
|
||||
ValueRef::Text(v) => String::from_utf8_lossy(v).into_owned(),
|
||||
ValueRef::Blob(v) => base64::engine::general_purpose::STANDARD.encode(v),
|
||||
_ => format!("{:?}", value),
|
||||
}
|
||||
}
|
||||
|
||||
fn value_ref_to_json(value: ValueRef<'_>) -> serde_json::Value {
|
||||
match value {
|
||||
ValueRef::Null => serde_json::Value::Null,
|
||||
ValueRef::Boolean(v) => serde_json::Value::Bool(v),
|
||||
ValueRef::TinyInt(v) => serde_json::json!(v),
|
||||
ValueRef::SmallInt(v) => serde_json::json!(v),
|
||||
ValueRef::Int(v) => serde_json::json!(v),
|
||||
ValueRef::BigInt(v) => serde_json::json!(v),
|
||||
ValueRef::UTinyInt(v) => serde_json::json!(v),
|
||||
ValueRef::USmallInt(v) => serde_json::json!(v),
|
||||
ValueRef::UInt(v) => serde_json::json!(v),
|
||||
ValueRef::UBigInt(v) => serde_json::json!(v),
|
||||
ValueRef::Float(v) => serde_json::json!(v),
|
||||
ValueRef::Double(v) => serde_json::json!(v),
|
||||
ValueRef::Decimal(v) => serde_json::Value::String(v.to_string()),
|
||||
ValueRef::Text(v) => serde_json::Value::String(String::from_utf8_lossy(v).into_owned()),
|
||||
ValueRef::Blob(v) => serde_json::Value::String(base64::engine::general_purpose::STANDARD.encode(v)),
|
||||
_ => serde_json::Value::String(format!("{:?}", value)),
|
||||
}
|
||||
}
|
||||
|
||||
fn require_xml_content_type(headers: &HeaderMap) -> Option<Response> {
|
||||
let value = headers
|
||||
.get("content-type")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("")
|
||||
.trim();
|
||||
if value.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let lowered = value.to_ascii_lowercase();
|
||||
if lowered.starts_with("application/xml") || lowered.starts_with("text/xml") {
|
||||
return None;
|
||||
}
|
||||
Some(s3_error_response(S3Error::new(
|
||||
S3ErrorCode::InvalidRequest,
|
||||
"Content-Type must be application/xml or text/xml",
|
||||
)))
|
||||
}
|
||||
|
||||
fn s3_error_response(err: S3Error) -> Response {
|
||||
let status = StatusCode::from_u16(err.http_status()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR);
|
||||
let resource = if err.resource.is_empty() {
|
||||
"/".to_string()
|
||||
} else {
|
||||
err.resource.clone()
|
||||
};
|
||||
let body = err
|
||||
.with_resource(resource)
|
||||
.with_request_id(uuid::Uuid::new_v4().simple().to_string())
|
||||
.to_xml();
|
||||
(
|
||||
status,
|
||||
[("content-type", "application/xml")],
|
||||
body,
|
||||
)
|
||||
.into_response()
|
||||
}
|
||||
|
||||
fn build_stats_xml(bytes_scanned: usize, bytes_returned: usize) -> String {
|
||||
format!(
|
||||
"<Stats><BytesScanned>{}</BytesScanned><BytesProcessed>{}</BytesProcessed><BytesReturned>{}</BytesReturned></Stats>",
|
||||
bytes_scanned,
|
||||
bytes_scanned,
|
||||
bytes_returned
|
||||
)
|
||||
}
|
||||
|
||||
fn encode_select_event(event_type: &str, payload: &[u8]) -> Vec<u8> {
|
||||
let mut headers = Vec::new();
|
||||
headers.extend(encode_select_header(":event-type", event_type));
|
||||
if event_type == "Records" {
|
||||
headers.extend(encode_select_header(":content-type", "application/octet-stream"));
|
||||
} else if event_type == "Stats" {
|
||||
headers.extend(encode_select_header(":content-type", "text/xml"));
|
||||
}
|
||||
headers.extend(encode_select_header(":message-type", "event"));
|
||||
|
||||
let headers_len = headers.len() as u32;
|
||||
let total_len = 4 + 4 + 4 + headers.len() + payload.len() + 4;
|
||||
|
||||
let mut message = Vec::with_capacity(total_len);
|
||||
let mut prelude = Vec::with_capacity(8);
|
||||
prelude.extend((total_len as u32).to_be_bytes());
|
||||
prelude.extend(headers_len.to_be_bytes());
|
||||
|
||||
let prelude_crc = crc32(&prelude);
|
||||
message.extend(prelude);
|
||||
message.extend(prelude_crc.to_be_bytes());
|
||||
message.extend(headers);
|
||||
message.extend(payload);
|
||||
|
||||
let msg_crc = crc32(&message);
|
||||
message.extend(msg_crc.to_be_bytes());
|
||||
message
|
||||
}
|
||||
|
||||
fn encode_select_header(name: &str, value: &str) -> Vec<u8> {
|
||||
let name_bytes = name.as_bytes();
|
||||
let value_bytes = value.as_bytes();
|
||||
let mut header = Vec::with_capacity(1 + name_bytes.len() + 1 + 2 + value_bytes.len());
|
||||
header.push(name_bytes.len() as u8);
|
||||
header.extend(name_bytes);
|
||||
header.push(7);
|
||||
header.extend((value_bytes.len() as u16).to_be_bytes());
|
||||
header.extend(value_bytes);
|
||||
header
|
||||
}
|
||||
|
||||
fn crc32(data: &[u8]) -> u32 {
|
||||
let mut hasher = Hasher::new();
|
||||
hasher.update(data);
|
||||
hasher.finalize()
|
||||
}
|
||||
73
myfsio-engine/crates/myfsio-server/src/lib.rs
Normal file
73
myfsio-engine/crates/myfsio-server/src/lib.rs
Normal file
@@ -0,0 +1,73 @@
|
||||
pub mod config;
|
||||
pub mod handlers;
|
||||
pub mod middleware;
|
||||
pub mod services;
|
||||
pub mod state;
|
||||
|
||||
use axum::Router;
|
||||
|
||||
pub const SERVER_HEADER: &str = concat!("MyFSIO-Rust/", env!("CARGO_PKG_VERSION"));
|
||||
|
||||
pub fn create_router(state: state::AppState) -> Router {
|
||||
let mut router = Router::new()
|
||||
.route("/", axum::routing::get(handlers::list_buckets))
|
||||
.route(
|
||||
"/{bucket}",
|
||||
axum::routing::put(handlers::create_bucket)
|
||||
.get(handlers::get_bucket)
|
||||
.delete(handlers::delete_bucket)
|
||||
.head(handlers::head_bucket)
|
||||
.post(handlers::post_bucket),
|
||||
)
|
||||
.route(
|
||||
"/{bucket}/{*key}",
|
||||
axum::routing::put(handlers::put_object)
|
||||
.get(handlers::get_object)
|
||||
.delete(handlers::delete_object)
|
||||
.head(handlers::head_object)
|
||||
.post(handlers::post_object),
|
||||
);
|
||||
|
||||
if state.config.kms_enabled {
|
||||
router = router
|
||||
.route("/kms/keys", axum::routing::get(handlers::kms::list_keys).post(handlers::kms::create_key))
|
||||
.route("/kms/keys/{key_id}", axum::routing::get(handlers::kms::get_key).delete(handlers::kms::delete_key))
|
||||
.route("/kms/keys/{key_id}/enable", axum::routing::post(handlers::kms::enable_key))
|
||||
.route("/kms/keys/{key_id}/disable", axum::routing::post(handlers::kms::disable_key))
|
||||
.route("/kms/encrypt", axum::routing::post(handlers::kms::encrypt))
|
||||
.route("/kms/decrypt", axum::routing::post(handlers::kms::decrypt))
|
||||
.route("/kms/generate-data-key", axum::routing::post(handlers::kms::generate_data_key));
|
||||
}
|
||||
|
||||
router = router
|
||||
.route("/admin/site/local", axum::routing::get(handlers::admin::get_local_site).put(handlers::admin::update_local_site))
|
||||
.route("/admin/site/all", axum::routing::get(handlers::admin::list_all_sites))
|
||||
.route("/admin/site/peers", axum::routing::post(handlers::admin::register_peer_site))
|
||||
.route("/admin/site/peers/{site_id}", axum::routing::get(handlers::admin::get_peer_site).put(handlers::admin::update_peer_site).delete(handlers::admin::delete_peer_site))
|
||||
.route("/admin/site/peers/{site_id}/health", axum::routing::post(handlers::admin::check_peer_health))
|
||||
.route("/admin/site/topology", axum::routing::get(handlers::admin::get_topology))
|
||||
.route("/admin/site/peers/{site_id}/bidirectional-status", axum::routing::get(handlers::admin::check_bidirectional_status))
|
||||
.route("/admin/iam/users", axum::routing::get(handlers::admin::iam_list_users))
|
||||
.route("/admin/iam/users/{identifier}", axum::routing::get(handlers::admin::iam_get_user))
|
||||
.route("/admin/iam/users/{identifier}/policies", axum::routing::get(handlers::admin::iam_get_user_policies))
|
||||
.route("/admin/iam/users/{identifier}/access-keys", axum::routing::post(handlers::admin::iam_create_access_key))
|
||||
.route("/admin/iam/users/{identifier}/access-keys/{access_key}", axum::routing::delete(handlers::admin::iam_delete_access_key))
|
||||
.route("/admin/iam/users/{identifier}/disable", axum::routing::post(handlers::admin::iam_disable_user))
|
||||
.route("/admin/iam/users/{identifier}/enable", axum::routing::post(handlers::admin::iam_enable_user))
|
||||
.route("/admin/website-domains", axum::routing::get(handlers::admin::list_website_domains).post(handlers::admin::create_website_domain))
|
||||
.route("/admin/website-domains/{domain}", axum::routing::get(handlers::admin::get_website_domain).put(handlers::admin::update_website_domain).delete(handlers::admin::delete_website_domain))
|
||||
.route("/admin/gc/status", axum::routing::get(handlers::admin::gc_status))
|
||||
.route("/admin/gc/run", axum::routing::post(handlers::admin::gc_run))
|
||||
.route("/admin/gc/history", axum::routing::get(handlers::admin::gc_history))
|
||||
.route("/admin/integrity/status", axum::routing::get(handlers::admin::integrity_status))
|
||||
.route("/admin/integrity/run", axum::routing::post(handlers::admin::integrity_run))
|
||||
.route("/admin/integrity/history", axum::routing::get(handlers::admin::integrity_history));
|
||||
|
||||
router
|
||||
.layer(axum::middleware::from_fn_with_state(
|
||||
state.clone(),
|
||||
middleware::auth_layer,
|
||||
))
|
||||
.layer(axum::middleware::from_fn(middleware::server_header))
|
||||
.with_state(state)
|
||||
}
|
||||
97
myfsio-engine/crates/myfsio-server/src/main.rs
Normal file
97
myfsio-engine/crates/myfsio-server/src/main.rs
Normal file
@@ -0,0 +1,97 @@
|
||||
use myfsio_server::config::ServerConfig;
|
||||
use myfsio_server::state::AppState;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
tracing_subscriber::fmt::init();
|
||||
|
||||
let config = ServerConfig::from_env();
|
||||
let bind_addr = config.bind_addr;
|
||||
|
||||
tracing::info!("MyFSIO Rust Engine starting on {}", bind_addr);
|
||||
tracing::info!("Storage root: {}", config.storage_root.display());
|
||||
tracing::info!("Region: {}", config.region);
|
||||
tracing::info!(
|
||||
"Encryption: {}, KMS: {}, GC: {}, Lifecycle: {}, Integrity: {}, Metrics: {}",
|
||||
config.encryption_enabled,
|
||||
config.kms_enabled,
|
||||
config.gc_enabled,
|
||||
config.lifecycle_enabled,
|
||||
config.integrity_enabled,
|
||||
config.metrics_enabled
|
||||
);
|
||||
|
||||
let state = if config.encryption_enabled || config.kms_enabled {
|
||||
AppState::new_with_encryption(config.clone()).await
|
||||
} else {
|
||||
AppState::new(config.clone())
|
||||
};
|
||||
|
||||
let mut bg_handles: Vec<tokio::task::JoinHandle<()>> = Vec::new();
|
||||
|
||||
if let Some(ref gc) = state.gc {
|
||||
bg_handles.push(gc.clone().start_background());
|
||||
tracing::info!("GC background service started");
|
||||
}
|
||||
|
||||
if let Some(ref integrity) = state.integrity {
|
||||
bg_handles.push(integrity.clone().start_background());
|
||||
tracing::info!("Integrity checker background service started");
|
||||
}
|
||||
|
||||
if let Some(ref metrics) = state.metrics {
|
||||
bg_handles.push(metrics.clone().start_background());
|
||||
tracing::info!("Metrics collector background service started");
|
||||
}
|
||||
|
||||
if config.lifecycle_enabled {
|
||||
let lifecycle = std::sync::Arc::new(
|
||||
myfsio_server::services::lifecycle::LifecycleService::new(
|
||||
state.storage.clone(),
|
||||
myfsio_server::services::lifecycle::LifecycleConfig::default(),
|
||||
),
|
||||
);
|
||||
bg_handles.push(lifecycle.start_background());
|
||||
tracing::info!("Lifecycle manager background service started");
|
||||
}
|
||||
|
||||
let app = myfsio_server::create_router(state);
|
||||
|
||||
let listener = match tokio::net::TcpListener::bind(bind_addr).await {
|
||||
Ok(listener) => listener,
|
||||
Err(err) => {
|
||||
if err.kind() == std::io::ErrorKind::AddrInUse {
|
||||
tracing::error!("Port already in use: {}", bind_addr);
|
||||
} else {
|
||||
tracing::error!("Failed to bind {}: {}", bind_addr, err);
|
||||
}
|
||||
for handle in bg_handles {
|
||||
handle.abort();
|
||||
}
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
tracing::info!("Listening on {}", bind_addr);
|
||||
|
||||
if let Err(err) = axum::serve(listener, app)
|
||||
.with_graceful_shutdown(shutdown_signal())
|
||||
.await
|
||||
{
|
||||
tracing::error!("Server exited with error: {}", err);
|
||||
for handle in bg_handles {
|
||||
handle.abort();
|
||||
}
|
||||
std::process::exit(1);
|
||||
}
|
||||
|
||||
for handle in bg_handles {
|
||||
handle.abort();
|
||||
}
|
||||
}
|
||||
|
||||
async fn shutdown_signal() {
|
||||
tokio::signal::ctrl_c()
|
||||
.await
|
||||
.expect("Failed to listen for Ctrl+C");
|
||||
tracing::info!("Shutdown signal received");
|
||||
}
|
||||
569
myfsio-engine/crates/myfsio-server/src/middleware/auth.rs
Normal file
569
myfsio-engine/crates/myfsio-server/src/middleware/auth.rs
Normal file
@@ -0,0 +1,569 @@
|
||||
use axum::extract::{Request, State};
|
||||
use axum::http::{Method, StatusCode};
|
||||
use axum::middleware::Next;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
|
||||
use chrono::{NaiveDateTime, Utc};
|
||||
use myfsio_auth::sigv4;
|
||||
use myfsio_common::error::{S3Error, S3ErrorCode};
|
||||
use myfsio_common::types::Principal;
|
||||
|
||||
use crate::state::AppState;
|
||||
|
||||
pub async fn auth_layer(
|
||||
State(state): State<AppState>,
|
||||
mut req: Request,
|
||||
next: Next,
|
||||
) -> Response {
|
||||
let uri = req.uri().clone();
|
||||
let path = uri.path().to_string();
|
||||
|
||||
if path == "/" && req.method() == axum::http::Method::GET {
|
||||
match try_auth(&state, &req) {
|
||||
AuthResult::Ok(principal) => {
|
||||
if let Err(err) = authorize_request(&state, &principal, &req) {
|
||||
return error_response(err, &path);
|
||||
}
|
||||
req.extensions_mut().insert(principal);
|
||||
}
|
||||
AuthResult::Denied(err) => return error_response(err, &path),
|
||||
AuthResult::NoAuth => {
|
||||
return error_response(
|
||||
S3Error::new(S3ErrorCode::AccessDenied, "Missing credentials"),
|
||||
&path,
|
||||
);
|
||||
}
|
||||
}
|
||||
return next.run(req).await;
|
||||
}
|
||||
|
||||
match try_auth(&state, &req) {
|
||||
AuthResult::Ok(principal) => {
|
||||
if let Err(err) = authorize_request(&state, &principal, &req) {
|
||||
return error_response(err, &path);
|
||||
}
|
||||
req.extensions_mut().insert(principal);
|
||||
next.run(req).await
|
||||
}
|
||||
AuthResult::Denied(err) => error_response(err, &path),
|
||||
AuthResult::NoAuth => {
|
||||
error_response(
|
||||
S3Error::new(S3ErrorCode::AccessDenied, "Missing credentials"),
|
||||
&path,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum AuthResult {
|
||||
Ok(Principal),
|
||||
Denied(S3Error),
|
||||
NoAuth,
|
||||
}
|
||||
|
||||
fn authorize_request(state: &AppState, principal: &Principal, req: &Request) -> Result<(), S3Error> {
|
||||
let path = req.uri().path();
|
||||
if path == "/" {
|
||||
if state.iam.authorize(principal, None, "list", None) {
|
||||
return Ok(());
|
||||
}
|
||||
return Err(S3Error::new(S3ErrorCode::AccessDenied, "Access denied"));
|
||||
}
|
||||
|
||||
if path.starts_with("/admin/") || path.starts_with("/kms/") {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut segments = path.trim_start_matches('/').split('/').filter(|s| !s.is_empty());
|
||||
let bucket = match segments.next() {
|
||||
Some(b) => b,
|
||||
None => {
|
||||
return Err(S3Error::new(S3ErrorCode::AccessDenied, "Access denied"));
|
||||
}
|
||||
};
|
||||
let remaining: Vec<&str> = segments.collect();
|
||||
let query = req.uri().query().unwrap_or("");
|
||||
|
||||
if remaining.is_empty() {
|
||||
let action = resolve_bucket_action(req.method(), query);
|
||||
if state.iam.authorize(principal, Some(bucket), action, None) {
|
||||
return Ok(());
|
||||
}
|
||||
return Err(S3Error::new(S3ErrorCode::AccessDenied, "Access denied"));
|
||||
}
|
||||
|
||||
let object_key = remaining.join("/");
|
||||
if req.method() == Method::PUT {
|
||||
if let Some(copy_source) = req
|
||||
.headers()
|
||||
.get("x-amz-copy-source")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
{
|
||||
let source = copy_source.strip_prefix('/').unwrap_or(copy_source);
|
||||
if let Some((src_bucket, src_key)) = source.split_once('/') {
|
||||
let source_allowed =
|
||||
state.iam.authorize(principal, Some(src_bucket), "read", Some(src_key));
|
||||
let dest_allowed =
|
||||
state.iam.authorize(principal, Some(bucket), "write", Some(&object_key));
|
||||
if source_allowed && dest_allowed {
|
||||
return Ok(());
|
||||
}
|
||||
return Err(S3Error::new(S3ErrorCode::AccessDenied, "Access denied"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let action = resolve_object_action(req.method(), query);
|
||||
if state
|
||||
.iam
|
||||
.authorize(principal, Some(bucket), action, Some(&object_key))
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
Err(S3Error::new(S3ErrorCode::AccessDenied, "Access denied"))
|
||||
}
|
||||
|
||||
fn resolve_bucket_action(method: &Method, query: &str) -> &'static str {
|
||||
if has_query_key(query, "versioning") {
|
||||
return "versioning";
|
||||
}
|
||||
if has_query_key(query, "tagging") {
|
||||
return "tagging";
|
||||
}
|
||||
if has_query_key(query, "cors") {
|
||||
return "cors";
|
||||
}
|
||||
if has_query_key(query, "location") {
|
||||
return "list";
|
||||
}
|
||||
if has_query_key(query, "encryption") {
|
||||
return "encryption";
|
||||
}
|
||||
if has_query_key(query, "lifecycle") {
|
||||
return "lifecycle";
|
||||
}
|
||||
if has_query_key(query, "acl") {
|
||||
return "share";
|
||||
}
|
||||
if has_query_key(query, "policy") || has_query_key(query, "policyStatus") {
|
||||
return "policy";
|
||||
}
|
||||
if has_query_key(query, "replication") {
|
||||
return "replication";
|
||||
}
|
||||
if has_query_key(query, "quota") {
|
||||
return "quota";
|
||||
}
|
||||
if has_query_key(query, "website") {
|
||||
return "website";
|
||||
}
|
||||
if has_query_key(query, "object-lock") {
|
||||
return "object_lock";
|
||||
}
|
||||
if has_query_key(query, "notification") {
|
||||
return "notification";
|
||||
}
|
||||
if has_query_key(query, "logging") {
|
||||
return "logging";
|
||||
}
|
||||
if has_query_key(query, "versions") || has_query_key(query, "uploads") {
|
||||
return "list";
|
||||
}
|
||||
if has_query_key(query, "delete") {
|
||||
return "delete";
|
||||
}
|
||||
|
||||
match *method {
|
||||
Method::GET => "list",
|
||||
Method::HEAD => "read",
|
||||
Method::PUT => "create_bucket",
|
||||
Method::DELETE => "delete_bucket",
|
||||
Method::POST => "write",
|
||||
_ => "list",
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_object_action(method: &Method, query: &str) -> &'static str {
|
||||
if has_query_key(query, "tagging") {
|
||||
return if *method == Method::GET { "read" } else { "write" };
|
||||
}
|
||||
if has_query_key(query, "acl") {
|
||||
return if *method == Method::GET { "read" } else { "write" };
|
||||
}
|
||||
if has_query_key(query, "retention") || has_query_key(query, "legal-hold") {
|
||||
return "object_lock";
|
||||
}
|
||||
if has_query_key(query, "attributes") {
|
||||
return "read";
|
||||
}
|
||||
if has_query_key(query, "uploads") || has_query_key(query, "uploadId") {
|
||||
return match *method {
|
||||
Method::GET => "read",
|
||||
_ => "write",
|
||||
};
|
||||
}
|
||||
if has_query_key(query, "select") {
|
||||
return "read";
|
||||
}
|
||||
|
||||
match *method {
|
||||
Method::GET | Method::HEAD => "read",
|
||||
Method::PUT => "write",
|
||||
Method::DELETE => "delete",
|
||||
Method::POST => "write",
|
||||
_ => "read",
|
||||
}
|
||||
}
|
||||
|
||||
fn has_query_key(query: &str, key: &str) -> bool {
|
||||
if query.is_empty() {
|
||||
return false;
|
||||
}
|
||||
query
|
||||
.split('&')
|
||||
.filter(|part| !part.is_empty())
|
||||
.any(|part| part == key || part.starts_with(&format!("{}=", key)))
|
||||
}
|
||||
|
||||
fn try_auth(state: &AppState, req: &Request) -> AuthResult {
|
||||
if let Some(auth_header) = req.headers().get("authorization") {
|
||||
if let Ok(auth_str) = auth_header.to_str() {
|
||||
if auth_str.starts_with("AWS4-HMAC-SHA256 ") {
|
||||
return verify_sigv4_header(state, req, auth_str);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let query = req.uri().query().unwrap_or("");
|
||||
if query.contains("X-Amz-Algorithm=AWS4-HMAC-SHA256") {
|
||||
return verify_sigv4_query(state, req);
|
||||
}
|
||||
|
||||
if let (Some(ak), Some(sk)) = (
|
||||
req.headers().get("x-access-key").and_then(|v| v.to_str().ok()),
|
||||
req.headers().get("x-secret-key").and_then(|v| v.to_str().ok()),
|
||||
) {
|
||||
return match state.iam.authenticate(ak, sk) {
|
||||
Some(principal) => AuthResult::Ok(principal),
|
||||
None => AuthResult::Denied(
|
||||
S3Error::from_code(S3ErrorCode::SignatureDoesNotMatch),
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
AuthResult::NoAuth
|
||||
}
|
||||
|
||||
fn verify_sigv4_header(state: &AppState, req: &Request, auth_str: &str) -> AuthResult {
|
||||
let parts: Vec<&str> = auth_str
|
||||
.strip_prefix("AWS4-HMAC-SHA256 ")
|
||||
.unwrap()
|
||||
.split(", ")
|
||||
.collect();
|
||||
|
||||
if parts.len() != 3 {
|
||||
return AuthResult::Denied(
|
||||
S3Error::new(S3ErrorCode::InvalidArgument, "Malformed Authorization header"),
|
||||
);
|
||||
}
|
||||
|
||||
let credential = parts[0].strip_prefix("Credential=").unwrap_or("");
|
||||
let signed_headers_str = parts[1].strip_prefix("SignedHeaders=").unwrap_or("");
|
||||
let provided_signature = parts[2].strip_prefix("Signature=").unwrap_or("");
|
||||
|
||||
let cred_parts: Vec<&str> = credential.split('/').collect();
|
||||
if cred_parts.len() != 5 {
|
||||
return AuthResult::Denied(
|
||||
S3Error::new(S3ErrorCode::InvalidArgument, "Malformed credential"),
|
||||
);
|
||||
}
|
||||
|
||||
let access_key = cred_parts[0];
|
||||
let date_stamp = cred_parts[1];
|
||||
let region = cred_parts[2];
|
||||
let service = cred_parts[3];
|
||||
|
||||
let amz_date = req
|
||||
.headers()
|
||||
.get("x-amz-date")
|
||||
.or_else(|| req.headers().get("date"))
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("");
|
||||
|
||||
if amz_date.is_empty() {
|
||||
return AuthResult::Denied(
|
||||
S3Error::new(S3ErrorCode::AccessDenied, "Missing Date header"),
|
||||
);
|
||||
}
|
||||
|
||||
if let Some(err) = check_timestamp_freshness(amz_date, state.config.sigv4_timestamp_tolerance_secs) {
|
||||
return AuthResult::Denied(err);
|
||||
}
|
||||
|
||||
let secret_key = match state.iam.get_secret_key(access_key) {
|
||||
Some(sk) => sk,
|
||||
None => {
|
||||
return AuthResult::Denied(
|
||||
S3Error::from_code(S3ErrorCode::InvalidAccessKeyId),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
let method = req.method().as_str();
|
||||
let canonical_uri = req.uri().path();
|
||||
|
||||
let query_params = parse_query_params(req.uri().query().unwrap_or(""));
|
||||
|
||||
let payload_hash = req
|
||||
.headers()
|
||||
.get("x-amz-content-sha256")
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("UNSIGNED-PAYLOAD");
|
||||
|
||||
let signed_headers: Vec<&str> = signed_headers_str.split(';').collect();
|
||||
let header_values: Vec<(String, String)> = signed_headers
|
||||
.iter()
|
||||
.map(|&name| {
|
||||
let value = req
|
||||
.headers()
|
||||
.get(name)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("");
|
||||
(name.to_string(), value.to_string())
|
||||
})
|
||||
.collect();
|
||||
|
||||
let verified = sigv4::verify_sigv4_signature(
|
||||
method,
|
||||
canonical_uri,
|
||||
&query_params,
|
||||
signed_headers_str,
|
||||
&header_values,
|
||||
payload_hash,
|
||||
amz_date,
|
||||
date_stamp,
|
||||
region,
|
||||
service,
|
||||
&secret_key,
|
||||
provided_signature,
|
||||
);
|
||||
|
||||
if !verified {
|
||||
return AuthResult::Denied(
|
||||
S3Error::from_code(S3ErrorCode::SignatureDoesNotMatch),
|
||||
);
|
||||
}
|
||||
|
||||
match state.iam.get_principal(access_key) {
|
||||
Some(p) => AuthResult::Ok(p),
|
||||
None => AuthResult::Denied(
|
||||
S3Error::from_code(S3ErrorCode::InvalidAccessKeyId),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn verify_sigv4_query(state: &AppState, req: &Request) -> AuthResult {
|
||||
let query = req.uri().query().unwrap_or("");
|
||||
let params = parse_query_params(query);
|
||||
let param_map: std::collections::HashMap<&str, &str> = params
|
||||
.iter()
|
||||
.map(|(k, v)| (k.as_str(), v.as_str()))
|
||||
.collect();
|
||||
|
||||
let credential = match param_map.get("X-Amz-Credential") {
|
||||
Some(c) => *c,
|
||||
None => {
|
||||
return AuthResult::Denied(
|
||||
S3Error::new(S3ErrorCode::InvalidArgument, "Missing X-Amz-Credential"),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
let signed_headers_str = param_map
|
||||
.get("X-Amz-SignedHeaders")
|
||||
.copied()
|
||||
.unwrap_or("host");
|
||||
let provided_signature = match param_map.get("X-Amz-Signature") {
|
||||
Some(s) => *s,
|
||||
None => {
|
||||
return AuthResult::Denied(
|
||||
S3Error::new(S3ErrorCode::InvalidArgument, "Missing X-Amz-Signature"),
|
||||
);
|
||||
}
|
||||
};
|
||||
let amz_date = match param_map.get("X-Amz-Date") {
|
||||
Some(d) => *d,
|
||||
None => {
|
||||
return AuthResult::Denied(
|
||||
S3Error::new(S3ErrorCode::InvalidArgument, "Missing X-Amz-Date"),
|
||||
);
|
||||
}
|
||||
};
|
||||
let expires_str = match param_map.get("X-Amz-Expires") {
|
||||
Some(e) => *e,
|
||||
None => {
|
||||
return AuthResult::Denied(
|
||||
S3Error::new(S3ErrorCode::InvalidArgument, "Missing X-Amz-Expires"),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
let cred_parts: Vec<&str> = credential.split('/').collect();
|
||||
if cred_parts.len() != 5 {
|
||||
return AuthResult::Denied(
|
||||
S3Error::new(S3ErrorCode::InvalidArgument, "Malformed credential"),
|
||||
);
|
||||
}
|
||||
|
||||
let access_key = cred_parts[0];
|
||||
let date_stamp = cred_parts[1];
|
||||
let region = cred_parts[2];
|
||||
let service = cred_parts[3];
|
||||
|
||||
let expires: u64 = match expires_str.parse() {
|
||||
Ok(e) => e,
|
||||
Err(_) => {
|
||||
return AuthResult::Denied(
|
||||
S3Error::new(S3ErrorCode::InvalidArgument, "Invalid X-Amz-Expires"),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
if expires < state.config.presigned_url_min_expiry
|
||||
|| expires > state.config.presigned_url_max_expiry
|
||||
{
|
||||
return AuthResult::Denied(
|
||||
S3Error::new(S3ErrorCode::InvalidArgument, "X-Amz-Expires out of range"),
|
||||
);
|
||||
}
|
||||
|
||||
if let Ok(request_time) =
|
||||
NaiveDateTime::parse_from_str(amz_date, "%Y%m%dT%H%M%SZ")
|
||||
{
|
||||
let request_utc = request_time.and_utc();
|
||||
let now = Utc::now();
|
||||
let elapsed = (now - request_utc).num_seconds();
|
||||
if elapsed > expires as i64 {
|
||||
return AuthResult::Denied(
|
||||
S3Error::new(S3ErrorCode::AccessDenied, "Request has expired"),
|
||||
);
|
||||
}
|
||||
if elapsed < -(state.config.sigv4_timestamp_tolerance_secs as i64) {
|
||||
return AuthResult::Denied(
|
||||
S3Error::new(S3ErrorCode::AccessDenied, "Request is too far in the future"),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let secret_key = match state.iam.get_secret_key(access_key) {
|
||||
Some(sk) => sk,
|
||||
None => {
|
||||
return AuthResult::Denied(
|
||||
S3Error::from_code(S3ErrorCode::InvalidAccessKeyId),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
let method = req.method().as_str();
|
||||
let canonical_uri = req.uri().path();
|
||||
|
||||
let query_params_no_sig: Vec<(String, String)> = params
|
||||
.iter()
|
||||
.filter(|(k, _)| k != "X-Amz-Signature")
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
let payload_hash = "UNSIGNED-PAYLOAD";
|
||||
|
||||
let signed_headers: Vec<&str> = signed_headers_str.split(';').collect();
|
||||
let header_values: Vec<(String, String)> = signed_headers
|
||||
.iter()
|
||||
.map(|&name| {
|
||||
let value = req
|
||||
.headers()
|
||||
.get(name)
|
||||
.and_then(|v| v.to_str().ok())
|
||||
.unwrap_or("");
|
||||
(name.to_string(), value.to_string())
|
||||
})
|
||||
.collect();
|
||||
|
||||
let verified = sigv4::verify_sigv4_signature(
|
||||
method,
|
||||
canonical_uri,
|
||||
&query_params_no_sig,
|
||||
signed_headers_str,
|
||||
&header_values,
|
||||
payload_hash,
|
||||
amz_date,
|
||||
date_stamp,
|
||||
region,
|
||||
service,
|
||||
&secret_key,
|
||||
provided_signature,
|
||||
);
|
||||
|
||||
if !verified {
|
||||
return AuthResult::Denied(
|
||||
S3Error::from_code(S3ErrorCode::SignatureDoesNotMatch),
|
||||
);
|
||||
}
|
||||
|
||||
match state.iam.get_principal(access_key) {
|
||||
Some(p) => AuthResult::Ok(p),
|
||||
None => AuthResult::Denied(
|
||||
S3Error::from_code(S3ErrorCode::InvalidAccessKeyId),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
fn check_timestamp_freshness(amz_date: &str, tolerance_secs: u64) -> Option<S3Error> {
|
||||
let request_time = NaiveDateTime::parse_from_str(amz_date, "%Y%m%dT%H%M%SZ").ok()?;
|
||||
let request_utc = request_time.and_utc();
|
||||
let now = Utc::now();
|
||||
let diff = (now - request_utc).num_seconds().unsigned_abs();
|
||||
|
||||
if diff > tolerance_secs {
|
||||
return Some(S3Error::new(
|
||||
S3ErrorCode::AccessDenied,
|
||||
"Request timestamp too old or too far in the future",
|
||||
));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn parse_query_params(query: &str) -> Vec<(String, String)> {
|
||||
if query.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
query
|
||||
.split('&')
|
||||
.filter_map(|pair| {
|
||||
let mut parts = pair.splitn(2, '=');
|
||||
let key = parts.next()?;
|
||||
let value = parts.next().unwrap_or("");
|
||||
Some((
|
||||
urlencoding_decode(key),
|
||||
urlencoding_decode(value),
|
||||
))
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn urlencoding_decode(s: &str) -> String {
|
||||
percent_encoding::percent_decode_str(s)
|
||||
.decode_utf8_lossy()
|
||||
.into_owned()
|
||||
}
|
||||
|
||||
fn error_response(err: S3Error, resource: &str) -> Response {
|
||||
let status =
|
||||
StatusCode::from_u16(err.http_status()).unwrap_or(StatusCode::INTERNAL_SERVER_ERROR);
|
||||
let request_id = uuid::Uuid::new_v4().simple().to_string();
|
||||
let body = err
|
||||
.with_resource(resource.to_string())
|
||||
.with_request_id(request_id)
|
||||
.to_xml();
|
||||
(status, [("content-type", "application/xml")], body).into_response()
|
||||
}
|
||||
16
myfsio-engine/crates/myfsio-server/src/middleware/mod.rs
Normal file
16
myfsio-engine/crates/myfsio-server/src/middleware/mod.rs
Normal file
@@ -0,0 +1,16 @@
|
||||
mod auth;
|
||||
|
||||
pub use auth::auth_layer;
|
||||
|
||||
use axum::extract::Request;
|
||||
use axum::middleware::Next;
|
||||
use axum::response::Response;
|
||||
|
||||
pub async fn server_header(req: Request, next: Next) -> Response {
|
||||
let mut resp = next.run(req).await;
|
||||
resp.headers_mut().insert(
|
||||
"server",
|
||||
crate::SERVER_HEADER.parse().unwrap(),
|
||||
);
|
||||
resp
|
||||
}
|
||||
263
myfsio-engine/crates/myfsio-server/src/services/gc.rs
Normal file
263
myfsio-engine/crates/myfsio-server/src/services/gc.rs
Normal file
@@ -0,0 +1,263 @@
|
||||
use serde_json::{json, Value};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
pub struct GcConfig {
|
||||
pub interval_hours: f64,
|
||||
pub temp_file_max_age_hours: f64,
|
||||
pub multipart_max_age_days: u64,
|
||||
pub lock_file_max_age_hours: f64,
|
||||
pub dry_run: bool,
|
||||
}
|
||||
|
||||
impl Default for GcConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
interval_hours: 6.0,
|
||||
temp_file_max_age_hours: 24.0,
|
||||
multipart_max_age_days: 7,
|
||||
lock_file_max_age_hours: 1.0,
|
||||
dry_run: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct GcService {
|
||||
storage_root: PathBuf,
|
||||
config: GcConfig,
|
||||
running: Arc<RwLock<bool>>,
|
||||
history: Arc<RwLock<Vec<Value>>>,
|
||||
history_path: PathBuf,
|
||||
}
|
||||
|
||||
impl GcService {
|
||||
pub fn new(storage_root: PathBuf, config: GcConfig) -> Self {
|
||||
let history_path = storage_root
|
||||
.join(".myfsio.sys")
|
||||
.join("config")
|
||||
.join("gc_history.json");
|
||||
|
||||
let history = if history_path.exists() {
|
||||
std::fs::read_to_string(&history_path)
|
||||
.ok()
|
||||
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
|
||||
.and_then(|v| v.get("executions").and_then(|e| e.as_array().cloned()))
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
Self {
|
||||
storage_root,
|
||||
config,
|
||||
running: Arc::new(RwLock::new(false)),
|
||||
history: Arc::new(RwLock::new(history)),
|
||||
history_path,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn status(&self) -> Value {
|
||||
let running = *self.running.read().await;
|
||||
json!({
|
||||
"enabled": true,
|
||||
"running": running,
|
||||
"interval_hours": self.config.interval_hours,
|
||||
"temp_file_max_age_hours": self.config.temp_file_max_age_hours,
|
||||
"multipart_max_age_days": self.config.multipart_max_age_days,
|
||||
"lock_file_max_age_hours": self.config.lock_file_max_age_hours,
|
||||
"dry_run": self.config.dry_run,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn history(&self) -> Value {
|
||||
let history = self.history.read().await;
|
||||
json!({ "executions": *history })
|
||||
}
|
||||
|
||||
pub async fn run_now(&self, dry_run: bool) -> Result<Value, String> {
|
||||
{
|
||||
let mut running = self.running.write().await;
|
||||
if *running {
|
||||
return Err("GC already running".to_string());
|
||||
}
|
||||
*running = true;
|
||||
}
|
||||
|
||||
let start = Instant::now();
|
||||
let result = self.execute_gc(dry_run || self.config.dry_run).await;
|
||||
let elapsed = start.elapsed().as_secs_f64();
|
||||
|
||||
*self.running.write().await = false;
|
||||
|
||||
let mut result_json = result.clone();
|
||||
if let Some(obj) = result_json.as_object_mut() {
|
||||
obj.insert("execution_time_seconds".to_string(), json!(elapsed));
|
||||
}
|
||||
|
||||
let record = json!({
|
||||
"timestamp": chrono::Utc::now().timestamp_millis() as f64 / 1000.0,
|
||||
"dry_run": dry_run || self.config.dry_run,
|
||||
"result": result_json,
|
||||
});
|
||||
|
||||
{
|
||||
let mut history = self.history.write().await;
|
||||
history.push(record);
|
||||
if history.len() > 50 {
|
||||
let excess = history.len() - 50;
|
||||
history.drain(..excess);
|
||||
}
|
||||
}
|
||||
self.save_history().await;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn execute_gc(&self, dry_run: bool) -> Value {
|
||||
let mut temp_files_deleted = 0u64;
|
||||
let mut temp_bytes_freed = 0u64;
|
||||
let mut multipart_uploads_deleted = 0u64;
|
||||
let mut lock_files_deleted = 0u64;
|
||||
let mut empty_dirs_removed = 0u64;
|
||||
let mut errors: Vec<String> = Vec::new();
|
||||
|
||||
let now = std::time::SystemTime::now();
|
||||
let temp_max_age = std::time::Duration::from_secs_f64(self.config.temp_file_max_age_hours * 3600.0);
|
||||
let multipart_max_age = std::time::Duration::from_secs(self.config.multipart_max_age_days * 86400);
|
||||
let lock_max_age = std::time::Duration::from_secs_f64(self.config.lock_file_max_age_hours * 3600.0);
|
||||
|
||||
let tmp_dir = self.storage_root.join(".myfsio.sys").join("tmp");
|
||||
if tmp_dir.exists() {
|
||||
match std::fs::read_dir(&tmp_dir) {
|
||||
Ok(entries) => {
|
||||
for entry in entries.flatten() {
|
||||
if let Ok(metadata) = entry.metadata() {
|
||||
if let Ok(modified) = metadata.modified() {
|
||||
if let Ok(age) = now.duration_since(modified) {
|
||||
if age > temp_max_age {
|
||||
let size = metadata.len();
|
||||
if !dry_run {
|
||||
if let Err(e) = std::fs::remove_file(entry.path()) {
|
||||
errors.push(format!("Failed to remove temp file: {}", e));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
temp_files_deleted += 1;
|
||||
temp_bytes_freed += size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => errors.push(format!("Failed to read tmp dir: {}", e)),
|
||||
}
|
||||
}
|
||||
|
||||
let multipart_dir = self.storage_root.join(".myfsio.sys").join("multipart");
|
||||
if multipart_dir.exists() {
|
||||
if let Ok(bucket_dirs) = std::fs::read_dir(&multipart_dir) {
|
||||
for bucket_entry in bucket_dirs.flatten() {
|
||||
if let Ok(uploads) = std::fs::read_dir(bucket_entry.path()) {
|
||||
for upload in uploads.flatten() {
|
||||
if let Ok(metadata) = upload.metadata() {
|
||||
if let Ok(modified) = metadata.modified() {
|
||||
if let Ok(age) = now.duration_since(modified) {
|
||||
if age > multipart_max_age {
|
||||
if !dry_run {
|
||||
let _ = std::fs::remove_dir_all(upload.path());
|
||||
}
|
||||
multipart_uploads_deleted += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let buckets_dir = self.storage_root.join(".myfsio.sys").join("buckets");
|
||||
if buckets_dir.exists() {
|
||||
if let Ok(bucket_dirs) = std::fs::read_dir(&buckets_dir) {
|
||||
for bucket_entry in bucket_dirs.flatten() {
|
||||
let locks_dir = bucket_entry.path().join("locks");
|
||||
if locks_dir.exists() {
|
||||
if let Ok(locks) = std::fs::read_dir(&locks_dir) {
|
||||
for lock in locks.flatten() {
|
||||
if let Ok(metadata) = lock.metadata() {
|
||||
if let Ok(modified) = metadata.modified() {
|
||||
if let Ok(age) = now.duration_since(modified) {
|
||||
if age > lock_max_age {
|
||||
if !dry_run {
|
||||
let _ = std::fs::remove_file(lock.path());
|
||||
}
|
||||
lock_files_deleted += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !dry_run {
|
||||
for dir in [&tmp_dir, &multipart_dir] {
|
||||
if dir.exists() {
|
||||
if let Ok(entries) = std::fs::read_dir(dir) {
|
||||
for entry in entries.flatten() {
|
||||
if entry.path().is_dir() {
|
||||
if let Ok(mut contents) = std::fs::read_dir(entry.path()) {
|
||||
if contents.next().is_none() {
|
||||
let _ = std::fs::remove_dir(entry.path());
|
||||
empty_dirs_removed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
json!({
|
||||
"temp_files_deleted": temp_files_deleted,
|
||||
"temp_bytes_freed": temp_bytes_freed,
|
||||
"multipart_uploads_deleted": multipart_uploads_deleted,
|
||||
"lock_files_deleted": lock_files_deleted,
|
||||
"empty_dirs_removed": empty_dirs_removed,
|
||||
"errors": errors,
|
||||
})
|
||||
}
|
||||
|
||||
async fn save_history(&self) {
|
||||
let history = self.history.read().await;
|
||||
let data = json!({ "executions": *history });
|
||||
if let Some(parent) = self.history_path.parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
let _ = std::fs::write(&self.history_path, serde_json::to_string_pretty(&data).unwrap_or_default());
|
||||
}
|
||||
|
||||
pub fn start_background(self: Arc<Self>) -> tokio::task::JoinHandle<()> {
|
||||
let interval = std::time::Duration::from_secs_f64(self.config.interval_hours * 3600.0);
|
||||
tokio::spawn(async move {
|
||||
let mut timer = tokio::time::interval(interval);
|
||||
timer.tick().await;
|
||||
loop {
|
||||
timer.tick().await;
|
||||
tracing::info!("GC cycle starting");
|
||||
match self.run_now(false).await {
|
||||
Ok(result) => tracing::info!("GC cycle complete: {:?}", result),
|
||||
Err(e) => tracing::warn!("GC cycle failed: {}", e),
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
204
myfsio-engine/crates/myfsio-server/src/services/integrity.rs
Normal file
204
myfsio-engine/crates/myfsio-server/src/services/integrity.rs
Normal file
@@ -0,0 +1,204 @@
|
||||
use myfsio_storage::fs_backend::FsStorageBackend;
|
||||
use myfsio_storage::traits::StorageEngine;
|
||||
use serde_json::{json, Value};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
pub struct IntegrityConfig {
|
||||
pub interval_hours: f64,
|
||||
pub batch_size: usize,
|
||||
pub auto_heal: bool,
|
||||
pub dry_run: bool,
|
||||
}
|
||||
|
||||
impl Default for IntegrityConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
interval_hours: 24.0,
|
||||
batch_size: 1000,
|
||||
auto_heal: false,
|
||||
dry_run: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct IntegrityService {
|
||||
storage: Arc<FsStorageBackend>,
|
||||
config: IntegrityConfig,
|
||||
running: Arc<RwLock<bool>>,
|
||||
history: Arc<RwLock<Vec<Value>>>,
|
||||
history_path: PathBuf,
|
||||
}
|
||||
|
||||
impl IntegrityService {
|
||||
pub fn new(
|
||||
storage: Arc<FsStorageBackend>,
|
||||
storage_root: &std::path::Path,
|
||||
config: IntegrityConfig,
|
||||
) -> Self {
|
||||
let history_path = storage_root
|
||||
.join(".myfsio.sys")
|
||||
.join("config")
|
||||
.join("integrity_history.json");
|
||||
|
||||
let history = if history_path.exists() {
|
||||
std::fs::read_to_string(&history_path)
|
||||
.ok()
|
||||
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
|
||||
.and_then(|v| v.get("executions").and_then(|e| e.as_array().cloned()))
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
Self {
|
||||
storage,
|
||||
config,
|
||||
running: Arc::new(RwLock::new(false)),
|
||||
history: Arc::new(RwLock::new(history)),
|
||||
history_path,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn status(&self) -> Value {
|
||||
let running = *self.running.read().await;
|
||||
json!({
|
||||
"enabled": true,
|
||||
"running": running,
|
||||
"interval_hours": self.config.interval_hours,
|
||||
"batch_size": self.config.batch_size,
|
||||
"auto_heal": self.config.auto_heal,
|
||||
"dry_run": self.config.dry_run,
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn history(&self) -> Value {
|
||||
let history = self.history.read().await;
|
||||
json!({ "executions": *history })
|
||||
}
|
||||
|
||||
pub async fn run_now(&self, dry_run: bool, auto_heal: bool) -> Result<Value, String> {
|
||||
{
|
||||
let mut running = self.running.write().await;
|
||||
if *running {
|
||||
return Err("Integrity check already running".to_string());
|
||||
}
|
||||
*running = true;
|
||||
}
|
||||
|
||||
let start = Instant::now();
|
||||
let result = self.check_integrity(dry_run, auto_heal).await;
|
||||
let elapsed = start.elapsed().as_secs_f64();
|
||||
|
||||
*self.running.write().await = false;
|
||||
|
||||
let mut result_json = result.clone();
|
||||
if let Some(obj) = result_json.as_object_mut() {
|
||||
obj.insert("execution_time_seconds".to_string(), json!(elapsed));
|
||||
}
|
||||
|
||||
let record = json!({
|
||||
"timestamp": chrono::Utc::now().timestamp_millis() as f64 / 1000.0,
|
||||
"dry_run": dry_run,
|
||||
"auto_heal": auto_heal,
|
||||
"result": result_json,
|
||||
});
|
||||
|
||||
{
|
||||
let mut history = self.history.write().await;
|
||||
history.push(record);
|
||||
if history.len() > 50 {
|
||||
let excess = history.len() - 50;
|
||||
history.drain(..excess);
|
||||
}
|
||||
}
|
||||
self.save_history().await;
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn check_integrity(&self, _dry_run: bool, _auto_heal: bool) -> Value {
|
||||
let buckets = match self.storage.list_buckets().await {
|
||||
Ok(b) => b,
|
||||
Err(e) => return json!({"error": e.to_string()}),
|
||||
};
|
||||
|
||||
let mut objects_scanned = 0u64;
|
||||
let mut corrupted = 0u64;
|
||||
let mut phantom_metadata = 0u64;
|
||||
let mut errors: Vec<String> = Vec::new();
|
||||
|
||||
for bucket in &buckets {
|
||||
let params = myfsio_common::types::ListParams {
|
||||
max_keys: self.config.batch_size,
|
||||
..Default::default()
|
||||
};
|
||||
let objects = match self.storage.list_objects(&bucket.name, ¶ms).await {
|
||||
Ok(r) => r.objects,
|
||||
Err(e) => {
|
||||
errors.push(format!("{}: {}", bucket.name, e));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
for obj in &objects {
|
||||
objects_scanned += 1;
|
||||
match self.storage.get_object_path(&bucket.name, &obj.key).await {
|
||||
Ok(path) => {
|
||||
if !path.exists() {
|
||||
phantom_metadata += 1;
|
||||
} else if let Some(ref expected_etag) = obj.etag {
|
||||
match myfsio_crypto::hashing::md5_file(&path) {
|
||||
Ok(actual_etag) => {
|
||||
if &actual_etag != expected_etag {
|
||||
corrupted += 1;
|
||||
}
|
||||
}
|
||||
Err(e) => errors.push(format!("{}:{}: {}", bucket.name, obj.key, e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => errors.push(format!("{}:{}: {}", bucket.name, obj.key, e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
json!({
|
||||
"objects_scanned": objects_scanned,
|
||||
"buckets_scanned": buckets.len(),
|
||||
"corrupted_objects": corrupted,
|
||||
"phantom_metadata": phantom_metadata,
|
||||
"errors": errors,
|
||||
})
|
||||
}
|
||||
|
||||
async fn save_history(&self) {
|
||||
let history = self.history.read().await;
|
||||
let data = json!({ "executions": *history });
|
||||
if let Some(parent) = self.history_path.parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
let _ = std::fs::write(
|
||||
&self.history_path,
|
||||
serde_json::to_string_pretty(&data).unwrap_or_default(),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn start_background(self: Arc<Self>) -> tokio::task::JoinHandle<()> {
|
||||
let interval = std::time::Duration::from_secs_f64(self.config.interval_hours * 3600.0);
|
||||
tokio::spawn(async move {
|
||||
let mut timer = tokio::time::interval(interval);
|
||||
timer.tick().await;
|
||||
loop {
|
||||
timer.tick().await;
|
||||
tracing::info!("Integrity check starting");
|
||||
match self.run_now(false, false).await {
|
||||
Ok(result) => tracing::info!("Integrity check complete: {:?}", result),
|
||||
Err(e) => tracing::warn!("Integrity check failed: {}", e),
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
153
myfsio-engine/crates/myfsio-server/src/services/lifecycle.rs
Normal file
153
myfsio-engine/crates/myfsio-server/src/services/lifecycle.rs
Normal file
@@ -0,0 +1,153 @@
|
||||
use myfsio_storage::fs_backend::FsStorageBackend;
|
||||
use myfsio_storage::traits::StorageEngine;
|
||||
use serde_json::{json, Value};
|
||||
use std::sync::Arc;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
pub struct LifecycleConfig {
|
||||
pub interval_seconds: u64,
|
||||
}
|
||||
|
||||
impl Default for LifecycleConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
interval_seconds: 3600,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LifecycleService {
|
||||
storage: Arc<FsStorageBackend>,
|
||||
config: LifecycleConfig,
|
||||
running: Arc<RwLock<bool>>,
|
||||
}
|
||||
|
||||
impl LifecycleService {
|
||||
pub fn new(storage: Arc<FsStorageBackend>, config: LifecycleConfig) -> Self {
|
||||
Self {
|
||||
storage,
|
||||
config,
|
||||
running: Arc::new(RwLock::new(false)),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run_cycle(&self) -> Result<Value, String> {
|
||||
{
|
||||
let mut running = self.running.write().await;
|
||||
if *running {
|
||||
return Err("Lifecycle already running".to_string());
|
||||
}
|
||||
*running = true;
|
||||
}
|
||||
|
||||
let result = self.evaluate_rules().await;
|
||||
*self.running.write().await = false;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
async fn evaluate_rules(&self) -> Value {
|
||||
let buckets = match self.storage.list_buckets().await {
|
||||
Ok(b) => b,
|
||||
Err(e) => return json!({"error": e.to_string()}),
|
||||
};
|
||||
|
||||
let mut total_expired = 0u64;
|
||||
let mut total_multipart_aborted = 0u64;
|
||||
let mut errors: Vec<String> = Vec::new();
|
||||
|
||||
for bucket in &buckets {
|
||||
let config = match self.storage.get_bucket_config(&bucket.name).await {
|
||||
Ok(c) => c,
|
||||
Err(_) => continue,
|
||||
};
|
||||
|
||||
let lifecycle = match &config.lifecycle {
|
||||
Some(lc) => lc,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let rules = match lifecycle.as_str().and_then(|s| serde_json::from_str::<Value>(s).ok()) {
|
||||
Some(v) => v,
|
||||
None => continue,
|
||||
};
|
||||
|
||||
let rules_arr = match rules.get("Rules").and_then(|r| r.as_array()) {
|
||||
Some(a) => a.clone(),
|
||||
None => continue,
|
||||
};
|
||||
|
||||
for rule in &rules_arr {
|
||||
if rule.get("Status").and_then(|s| s.as_str()) != Some("Enabled") {
|
||||
continue;
|
||||
}
|
||||
|
||||
let prefix = rule
|
||||
.get("Filter")
|
||||
.and_then(|f| f.get("Prefix"))
|
||||
.and_then(|p| p.as_str())
|
||||
.or_else(|| rule.get("Prefix").and_then(|p| p.as_str()))
|
||||
.unwrap_or("");
|
||||
|
||||
if let Some(exp) = rule.get("Expiration") {
|
||||
if let Some(days) = exp.get("Days").and_then(|d| d.as_u64()) {
|
||||
let cutoff = chrono::Utc::now() - chrono::Duration::days(days as i64);
|
||||
let params = myfsio_common::types::ListParams {
|
||||
max_keys: 1000,
|
||||
prefix: if prefix.is_empty() { None } else { Some(prefix.to_string()) },
|
||||
..Default::default()
|
||||
};
|
||||
if let Ok(result) = self.storage.list_objects(&bucket.name, ¶ms).await {
|
||||
for obj in &result.objects {
|
||||
if obj.last_modified < cutoff {
|
||||
match self.storage.delete_object(&bucket.name, &obj.key).await {
|
||||
Ok(()) => total_expired += 1,
|
||||
Err(e) => errors.push(format!("{}:{}: {}", bucket.name, obj.key, e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(abort) = rule.get("AbortIncompleteMultipartUpload") {
|
||||
if let Some(days) = abort.get("DaysAfterInitiation").and_then(|d| d.as_u64()) {
|
||||
let cutoff = chrono::Utc::now() - chrono::Duration::days(days as i64);
|
||||
if let Ok(uploads) = self.storage.list_multipart_uploads(&bucket.name).await {
|
||||
for upload in &uploads {
|
||||
if upload.initiated < cutoff {
|
||||
match self.storage.abort_multipart(&bucket.name, &upload.upload_id).await {
|
||||
Ok(()) => total_multipart_aborted += 1,
|
||||
Err(e) => errors.push(format!("abort {}: {}", upload.upload_id, e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
json!({
|
||||
"objects_expired": total_expired,
|
||||
"multipart_aborted": total_multipart_aborted,
|
||||
"buckets_evaluated": buckets.len(),
|
||||
"errors": errors,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn start_background(self: Arc<Self>) -> tokio::task::JoinHandle<()> {
|
||||
let interval = std::time::Duration::from_secs(self.config.interval_seconds);
|
||||
tokio::spawn(async move {
|
||||
let mut timer = tokio::time::interval(interval);
|
||||
timer.tick().await;
|
||||
loop {
|
||||
timer.tick().await;
|
||||
tracing::info!("Lifecycle evaluation starting");
|
||||
match self.run_cycle().await {
|
||||
Ok(result) => tracing::info!("Lifecycle cycle complete: {:?}", result),
|
||||
Err(e) => tracing::warn!("Lifecycle cycle failed: {}", e),
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
219
myfsio-engine/crates/myfsio-server/src/services/metrics.rs
Normal file
219
myfsio-engine/crates/myfsio-server/src/services/metrics.rs
Normal file
@@ -0,0 +1,219 @@
|
||||
use serde_json::{json, Value};
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
pub struct MetricsConfig {
|
||||
pub interval_minutes: u64,
|
||||
pub retention_hours: u64,
|
||||
}
|
||||
|
||||
impl Default for MetricsConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
interval_minutes: 5,
|
||||
retention_hours: 24,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct MethodStats {
|
||||
count: u64,
|
||||
success_count: u64,
|
||||
error_count: u64,
|
||||
bytes_in: u64,
|
||||
bytes_out: u64,
|
||||
latencies: Vec<f64>,
|
||||
}
|
||||
|
||||
impl MethodStats {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
count: 0,
|
||||
success_count: 0,
|
||||
error_count: 0,
|
||||
bytes_in: 0,
|
||||
bytes_out: 0,
|
||||
latencies: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
fn to_json(&self) -> Value {
|
||||
let (min, max, avg, p50, p95, p99) = if self.latencies.is_empty() {
|
||||
(0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
|
||||
} else {
|
||||
let mut sorted = self.latencies.clone();
|
||||
sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
|
||||
let len = sorted.len();
|
||||
let sum: f64 = sorted.iter().sum();
|
||||
(
|
||||
sorted[0],
|
||||
sorted[len - 1],
|
||||
sum / len as f64,
|
||||
sorted[len / 2],
|
||||
sorted[((len as f64 * 0.95) as usize).min(len - 1)],
|
||||
sorted[((len as f64 * 0.99) as usize).min(len - 1)],
|
||||
)
|
||||
};
|
||||
|
||||
json!({
|
||||
"count": self.count,
|
||||
"success_count": self.success_count,
|
||||
"error_count": self.error_count,
|
||||
"bytes_in": self.bytes_in,
|
||||
"bytes_out": self.bytes_out,
|
||||
"latency_min_ms": min,
|
||||
"latency_max_ms": max,
|
||||
"latency_avg_ms": avg,
|
||||
"latency_p50_ms": p50,
|
||||
"latency_p95_ms": p95,
|
||||
"latency_p99_ms": p99,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
struct CurrentWindow {
|
||||
by_method: HashMap<String, MethodStats>,
|
||||
by_status_class: HashMap<String, u64>,
|
||||
start_time: Instant,
|
||||
}
|
||||
|
||||
impl CurrentWindow {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
by_method: HashMap::new(),
|
||||
by_status_class: HashMap::new(),
|
||||
start_time: Instant::now(),
|
||||
}
|
||||
}
|
||||
|
||||
fn reset(&mut self) {
|
||||
self.by_method.clear();
|
||||
self.by_status_class.clear();
|
||||
self.start_time = Instant::now();
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MetricsService {
|
||||
config: MetricsConfig,
|
||||
current: Arc<RwLock<CurrentWindow>>,
|
||||
snapshots: Arc<RwLock<Vec<Value>>>,
|
||||
snapshots_path: PathBuf,
|
||||
}
|
||||
|
||||
impl MetricsService {
|
||||
pub fn new(storage_root: &std::path::Path, config: MetricsConfig) -> Self {
|
||||
let snapshots_path = storage_root
|
||||
.join(".myfsio.sys")
|
||||
.join("config")
|
||||
.join("operation_metrics.json");
|
||||
|
||||
let snapshots = if snapshots_path.exists() {
|
||||
std::fs::read_to_string(&snapshots_path)
|
||||
.ok()
|
||||
.and_then(|s| serde_json::from_str::<Value>(&s).ok())
|
||||
.and_then(|v| v.get("snapshots").and_then(|s| s.as_array().cloned()))
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
|
||||
Self {
|
||||
config,
|
||||
current: Arc::new(RwLock::new(CurrentWindow::new())),
|
||||
snapshots: Arc::new(RwLock::new(snapshots)),
|
||||
snapshots_path,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn record(&self, method: &str, status: u16, latency_ms: f64, bytes_in: u64, bytes_out: u64) {
|
||||
let mut window = self.current.write().await;
|
||||
let stats = window.by_method.entry(method.to_string()).or_insert_with(MethodStats::new);
|
||||
stats.count += 1;
|
||||
if status < 400 {
|
||||
stats.success_count += 1;
|
||||
} else {
|
||||
stats.error_count += 1;
|
||||
}
|
||||
stats.bytes_in += bytes_in;
|
||||
stats.bytes_out += bytes_out;
|
||||
stats.latencies.push(latency_ms);
|
||||
|
||||
let class = format!("{}xx", status / 100);
|
||||
*window.by_status_class.entry(class).or_insert(0) += 1;
|
||||
}
|
||||
|
||||
pub async fn snapshot(&self) -> Value {
|
||||
let window = self.current.read().await;
|
||||
let mut by_method = serde_json::Map::new();
|
||||
for (method, stats) in &window.by_method {
|
||||
by_method.insert(method.clone(), stats.to_json());
|
||||
}
|
||||
|
||||
let snapshots = self.snapshots.read().await;
|
||||
json!({
|
||||
"enabled": true,
|
||||
"current_window": {
|
||||
"by_method": by_method,
|
||||
"by_status_class": window.by_status_class,
|
||||
"window_start_elapsed_secs": window.start_time.elapsed().as_secs_f64(),
|
||||
},
|
||||
"snapshots": *snapshots,
|
||||
})
|
||||
}
|
||||
|
||||
async fn flush_window(&self) {
|
||||
let snap = {
|
||||
let mut window = self.current.write().await;
|
||||
let mut by_method = serde_json::Map::new();
|
||||
for (method, stats) in &window.by_method {
|
||||
by_method.insert(method.clone(), stats.to_json());
|
||||
}
|
||||
let snap = json!({
|
||||
"timestamp": chrono::Utc::now().to_rfc3339(),
|
||||
"window_seconds": self.config.interval_minutes * 60,
|
||||
"by_method": by_method,
|
||||
"by_status_class": window.by_status_class,
|
||||
});
|
||||
window.reset();
|
||||
snap
|
||||
};
|
||||
|
||||
let max_snapshots = (self.config.retention_hours * 60 / self.config.interval_minutes) as usize;
|
||||
{
|
||||
let mut snapshots = self.snapshots.write().await;
|
||||
snapshots.push(snap);
|
||||
if snapshots.len() > max_snapshots {
|
||||
let excess = snapshots.len() - max_snapshots;
|
||||
snapshots.drain(..excess);
|
||||
}
|
||||
}
|
||||
self.save_snapshots().await;
|
||||
}
|
||||
|
||||
async fn save_snapshots(&self) {
|
||||
let snapshots = self.snapshots.read().await;
|
||||
let data = json!({ "snapshots": *snapshots });
|
||||
if let Some(parent) = self.snapshots_path.parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
let _ = std::fs::write(
|
||||
&self.snapshots_path,
|
||||
serde_json::to_string_pretty(&data).unwrap_or_default(),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn start_background(self: Arc<Self>) -> tokio::task::JoinHandle<()> {
|
||||
let interval = std::time::Duration::from_secs(self.config.interval_minutes * 60);
|
||||
tokio::spawn(async move {
|
||||
let mut timer = tokio::time::interval(interval);
|
||||
timer.tick().await;
|
||||
loop {
|
||||
timer.tick().await;
|
||||
self.flush_window().await;
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
6
myfsio-engine/crates/myfsio-server/src/services/mod.rs
Normal file
6
myfsio-engine/crates/myfsio-server/src/services/mod.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
pub mod gc;
|
||||
pub mod lifecycle;
|
||||
pub mod integrity;
|
||||
pub mod metrics;
|
||||
pub mod site_registry;
|
||||
pub mod website_domains;
|
||||
143
myfsio-engine/crates/myfsio-server/src/services/site_registry.rs
Normal file
143
myfsio-engine/crates/myfsio-server/src/services/site_registry.rs
Normal file
@@ -0,0 +1,143 @@
|
||||
use chrono::Utc;
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SiteInfo {
|
||||
pub site_id: String,
|
||||
pub endpoint: String,
|
||||
#[serde(default = "default_region")]
|
||||
pub region: String,
|
||||
#[serde(default = "default_priority")]
|
||||
pub priority: i32,
|
||||
#[serde(default)]
|
||||
pub display_name: String,
|
||||
#[serde(default)]
|
||||
pub created_at: Option<String>,
|
||||
}
|
||||
|
||||
fn default_region() -> String {
|
||||
"us-east-1".to_string()
|
||||
}
|
||||
fn default_priority() -> i32 {
|
||||
100
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct PeerSite {
|
||||
pub site_id: String,
|
||||
pub endpoint: String,
|
||||
#[serde(default = "default_region")]
|
||||
pub region: String,
|
||||
#[serde(default = "default_priority")]
|
||||
pub priority: i32,
|
||||
#[serde(default)]
|
||||
pub display_name: String,
|
||||
#[serde(default)]
|
||||
pub connection_id: Option<String>,
|
||||
#[serde(default)]
|
||||
pub created_at: Option<String>,
|
||||
#[serde(default)]
|
||||
pub is_healthy: bool,
|
||||
#[serde(default)]
|
||||
pub last_health_check: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
struct RegistryData {
|
||||
#[serde(default)]
|
||||
local: Option<SiteInfo>,
|
||||
#[serde(default)]
|
||||
peers: Vec<PeerSite>,
|
||||
}
|
||||
|
||||
pub struct SiteRegistry {
|
||||
path: PathBuf,
|
||||
data: Arc<RwLock<RegistryData>>,
|
||||
}
|
||||
|
||||
impl SiteRegistry {
|
||||
pub fn new(storage_root: &std::path::Path) -> Self {
|
||||
let path = storage_root
|
||||
.join(".myfsio.sys")
|
||||
.join("config")
|
||||
.join("site_registry.json");
|
||||
let data = if path.exists() {
|
||||
std::fs::read_to_string(&path)
|
||||
.ok()
|
||||
.and_then(|s| serde_json::from_str(&s).ok())
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
RegistryData::default()
|
||||
};
|
||||
Self {
|
||||
path,
|
||||
data: Arc::new(RwLock::new(data)),
|
||||
}
|
||||
}
|
||||
|
||||
fn save(&self) {
|
||||
let data = self.data.read();
|
||||
if let Some(parent) = self.path.parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
if let Ok(json) = serde_json::to_string_pretty(&*data) {
|
||||
let _ = std::fs::write(&self.path, json);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_local_site(&self) -> Option<SiteInfo> {
|
||||
self.data.read().local.clone()
|
||||
}
|
||||
|
||||
pub fn set_local_site(&self, site: SiteInfo) {
|
||||
self.data.write().local = Some(site);
|
||||
self.save();
|
||||
}
|
||||
|
||||
pub fn list_peers(&self) -> Vec<PeerSite> {
|
||||
self.data.read().peers.clone()
|
||||
}
|
||||
|
||||
pub fn get_peer(&self, site_id: &str) -> Option<PeerSite> {
|
||||
self.data.read().peers.iter().find(|p| p.site_id == site_id).cloned()
|
||||
}
|
||||
|
||||
pub fn add_peer(&self, peer: PeerSite) {
|
||||
self.data.write().peers.push(peer);
|
||||
self.save();
|
||||
}
|
||||
|
||||
pub fn update_peer(&self, peer: PeerSite) {
|
||||
let mut data = self.data.write();
|
||||
if let Some(existing) = data.peers.iter_mut().find(|p| p.site_id == peer.site_id) {
|
||||
*existing = peer;
|
||||
}
|
||||
drop(data);
|
||||
self.save();
|
||||
}
|
||||
|
||||
pub fn delete_peer(&self, site_id: &str) -> bool {
|
||||
let mut data = self.data.write();
|
||||
let len_before = data.peers.len();
|
||||
data.peers.retain(|p| p.site_id != site_id);
|
||||
let removed = data.peers.len() < len_before;
|
||||
drop(data);
|
||||
if removed {
|
||||
self.save();
|
||||
}
|
||||
removed
|
||||
}
|
||||
|
||||
pub fn update_health(&self, site_id: &str, is_healthy: bool) {
|
||||
let mut data = self.data.write();
|
||||
if let Some(peer) = data.peers.iter_mut().find(|p| p.site_id == site_id) {
|
||||
peer.is_healthy = is_healthy;
|
||||
peer.last_health_check = Some(Utc::now().to_rfc3339());
|
||||
}
|
||||
drop(data);
|
||||
self.save();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,104 @@
|
||||
use parking_lot::RwLock;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
struct DomainData {
|
||||
#[serde(default)]
|
||||
mappings: HashMap<String, String>,
|
||||
}
|
||||
|
||||
pub struct WebsiteDomainStore {
|
||||
path: PathBuf,
|
||||
data: Arc<RwLock<DomainData>>,
|
||||
}
|
||||
|
||||
impl WebsiteDomainStore {
|
||||
pub fn new(storage_root: &std::path::Path) -> Self {
|
||||
let path = storage_root
|
||||
.join(".myfsio.sys")
|
||||
.join("config")
|
||||
.join("website_domains.json");
|
||||
let data = if path.exists() {
|
||||
std::fs::read_to_string(&path)
|
||||
.ok()
|
||||
.and_then(|s| serde_json::from_str(&s).ok())
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
DomainData::default()
|
||||
};
|
||||
Self {
|
||||
path,
|
||||
data: Arc::new(RwLock::new(data)),
|
||||
}
|
||||
}
|
||||
|
||||
fn save(&self) {
|
||||
let data = self.data.read();
|
||||
if let Some(parent) = self.path.parent() {
|
||||
let _ = std::fs::create_dir_all(parent);
|
||||
}
|
||||
if let Ok(json) = serde_json::to_string_pretty(&*data) {
|
||||
let _ = std::fs::write(&self.path, json);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn list_all(&self) -> Vec<serde_json::Value> {
|
||||
self.data
|
||||
.read()
|
||||
.mappings
|
||||
.iter()
|
||||
.map(|(domain, bucket)| {
|
||||
serde_json::json!({
|
||||
"domain": domain,
|
||||
"bucket": bucket,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn get_bucket(&self, domain: &str) -> Option<String> {
|
||||
self.data.read().mappings.get(domain).cloned()
|
||||
}
|
||||
|
||||
pub fn set_mapping(&self, domain: &str, bucket: &str) {
|
||||
self.data.write().mappings.insert(domain.to_string(), bucket.to_string());
|
||||
self.save();
|
||||
}
|
||||
|
||||
pub fn delete_mapping(&self, domain: &str) -> bool {
|
||||
let removed = self.data.write().mappings.remove(domain).is_some();
|
||||
if removed {
|
||||
self.save();
|
||||
}
|
||||
removed
|
||||
}
|
||||
}
|
||||
|
||||
pub fn normalize_domain(domain: &str) -> String {
|
||||
domain.trim().to_ascii_lowercase()
|
||||
}
|
||||
|
||||
pub fn is_valid_domain(domain: &str) -> bool {
|
||||
if domain.is_empty() || domain.len() > 253 {
|
||||
return false;
|
||||
}
|
||||
let labels: Vec<&str> = domain.split('.').collect();
|
||||
if labels.len() < 2 {
|
||||
return false;
|
||||
}
|
||||
for label in &labels {
|
||||
if label.is_empty() || label.len() > 63 {
|
||||
return false;
|
||||
}
|
||||
if !label.chars().all(|c| c.is_ascii_alphanumeric() || c == '-') {
|
||||
return false;
|
||||
}
|
||||
if label.starts_with('-') || label.ends_with('-') {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
121
myfsio-engine/crates/myfsio-server/src/state.rs
Normal file
121
myfsio-engine/crates/myfsio-server/src/state.rs
Normal file
@@ -0,0 +1,121 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::config::ServerConfig;
|
||||
use crate::services::gc::GcService;
|
||||
use crate::services::integrity::IntegrityService;
|
||||
use crate::services::metrics::MetricsService;
|
||||
use crate::services::site_registry::SiteRegistry;
|
||||
use crate::services::website_domains::WebsiteDomainStore;
|
||||
use myfsio_auth::iam::IamService;
|
||||
use myfsio_crypto::encryption::EncryptionService;
|
||||
use myfsio_crypto::kms::KmsService;
|
||||
use myfsio_storage::fs_backend::FsStorageBackend;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct AppState {
|
||||
pub config: ServerConfig,
|
||||
pub storage: Arc<FsStorageBackend>,
|
||||
pub iam: Arc<IamService>,
|
||||
pub encryption: Option<Arc<EncryptionService>>,
|
||||
pub kms: Option<Arc<KmsService>>,
|
||||
pub gc: Option<Arc<GcService>>,
|
||||
pub integrity: Option<Arc<IntegrityService>>,
|
||||
pub metrics: Option<Arc<MetricsService>>,
|
||||
pub site_registry: Option<Arc<SiteRegistry>>,
|
||||
pub website_domains: Option<Arc<WebsiteDomainStore>>,
|
||||
}
|
||||
|
||||
impl AppState {
|
||||
pub fn new(config: ServerConfig) -> Self {
|
||||
let storage = Arc::new(FsStorageBackend::new(config.storage_root.clone()));
|
||||
let iam = Arc::new(IamService::new_with_secret(
|
||||
config.iam_config_path.clone(),
|
||||
config.secret_key.clone(),
|
||||
));
|
||||
|
||||
let gc = if config.gc_enabled {
|
||||
Some(Arc::new(GcService::new(
|
||||
config.storage_root.clone(),
|
||||
crate::services::gc::GcConfig::default(),
|
||||
)))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let integrity = if config.integrity_enabled {
|
||||
Some(Arc::new(IntegrityService::new(
|
||||
storage.clone(),
|
||||
&config.storage_root,
|
||||
crate::services::integrity::IntegrityConfig::default(),
|
||||
)))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let metrics = if config.metrics_enabled {
|
||||
Some(Arc::new(MetricsService::new(
|
||||
&config.storage_root,
|
||||
crate::services::metrics::MetricsConfig::default(),
|
||||
)))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let site_registry = Some(Arc::new(SiteRegistry::new(&config.storage_root)));
|
||||
|
||||
let website_domains = if config.website_hosting_enabled {
|
||||
Some(Arc::new(WebsiteDomainStore::new(&config.storage_root)))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
Self {
|
||||
config,
|
||||
storage,
|
||||
iam,
|
||||
encryption: None,
|
||||
kms: None,
|
||||
gc,
|
||||
integrity,
|
||||
metrics,
|
||||
site_registry,
|
||||
website_domains,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn new_with_encryption(config: ServerConfig) -> Self {
|
||||
let mut state = Self::new(config.clone());
|
||||
|
||||
let keys_dir = config.storage_root.join(".myfsio.sys").join("keys");
|
||||
|
||||
let kms = if config.kms_enabled {
|
||||
match KmsService::new(&keys_dir).await {
|
||||
Ok(k) => Some(Arc::new(k)),
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to initialize KMS: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let encryption = if config.encryption_enabled {
|
||||
match myfsio_crypto::kms::load_or_create_master_key(&keys_dir).await {
|
||||
Ok(master_key) => {
|
||||
Some(Arc::new(EncryptionService::new(master_key, kms.clone())))
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::error!("Failed to initialize encryption: {}", e);
|
||||
None
|
||||
}
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
state.encryption = encryption;
|
||||
state.kms = kms;
|
||||
state
|
||||
}
|
||||
}
|
||||
2201
myfsio-engine/crates/myfsio-server/tests/integration.rs
Normal file
2201
myfsio-engine/crates/myfsio-server/tests/integration.rs
Normal file
File diff suppressed because it is too large
Load Diff
26
myfsio-engine/crates/myfsio-storage/Cargo.toml
Normal file
26
myfsio-engine/crates/myfsio-storage/Cargo.toml
Normal file
@@ -0,0 +1,26 @@
|
||||
[package]
|
||||
name = "myfsio-storage"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
myfsio-common = { path = "../myfsio-common" }
|
||||
myfsio-crypto = { path = "../myfsio-crypto" }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
tokio = { workspace = true }
|
||||
dashmap = { workspace = true }
|
||||
parking_lot = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
unicode-normalization = { workspace = true }
|
||||
md-5 = { workspace = true }
|
||||
sha2 = { workspace = true }
|
||||
hex = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
|
||||
tempfile = "3"
|
||||
59
myfsio-engine/crates/myfsio-storage/src/error.rs
Normal file
59
myfsio-engine/crates/myfsio-storage/src/error.rs
Normal file
@@ -0,0 +1,59 @@
|
||||
use myfsio_common::error::{S3Error, S3ErrorCode};
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum StorageError {
|
||||
#[error("Bucket not found: {0}")]
|
||||
BucketNotFound(String),
|
||||
#[error("Bucket already exists: {0}")]
|
||||
BucketAlreadyExists(String),
|
||||
#[error("Bucket not empty: {0}")]
|
||||
BucketNotEmpty(String),
|
||||
#[error("Object not found: {bucket}/{key}")]
|
||||
ObjectNotFound { bucket: String, key: String },
|
||||
#[error("Invalid bucket name: {0}")]
|
||||
InvalidBucketName(String),
|
||||
#[error("Invalid object key: {0}")]
|
||||
InvalidObjectKey(String),
|
||||
#[error("Upload not found: {0}")]
|
||||
UploadNotFound(String),
|
||||
#[error("Quota exceeded: {0}")]
|
||||
QuotaExceeded(String),
|
||||
#[error("IO error: {0}")]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error("JSON error: {0}")]
|
||||
Json(#[from] serde_json::Error),
|
||||
#[error("Internal error: {0}")]
|
||||
Internal(String),
|
||||
}
|
||||
|
||||
impl From<StorageError> for S3Error {
|
||||
fn from(err: StorageError) -> Self {
|
||||
match err {
|
||||
StorageError::BucketNotFound(name) => {
|
||||
S3Error::from_code(S3ErrorCode::NoSuchBucket).with_resource(format!("/{}", name))
|
||||
}
|
||||
StorageError::BucketAlreadyExists(name) => {
|
||||
S3Error::from_code(S3ErrorCode::BucketAlreadyExists)
|
||||
.with_resource(format!("/{}", name))
|
||||
}
|
||||
StorageError::BucketNotEmpty(name) => {
|
||||
S3Error::from_code(S3ErrorCode::BucketNotEmpty)
|
||||
.with_resource(format!("/{}", name))
|
||||
}
|
||||
StorageError::ObjectNotFound { bucket, key } => {
|
||||
S3Error::from_code(S3ErrorCode::NoSuchKey)
|
||||
.with_resource(format!("/{}/{}", bucket, key))
|
||||
}
|
||||
StorageError::InvalidBucketName(msg) => S3Error::new(S3ErrorCode::InvalidBucketName, msg),
|
||||
StorageError::InvalidObjectKey(msg) => S3Error::new(S3ErrorCode::InvalidKey, msg),
|
||||
StorageError::UploadNotFound(id) => {
|
||||
S3Error::new(S3ErrorCode::NoSuchUpload, format!("Upload {} not found", id))
|
||||
}
|
||||
StorageError::QuotaExceeded(msg) => S3Error::new(S3ErrorCode::QuotaExceeded, msg),
|
||||
StorageError::Io(e) => S3Error::new(S3ErrorCode::InternalError, e.to_string()),
|
||||
StorageError::Json(e) => S3Error::new(S3ErrorCode::InternalError, e.to_string()),
|
||||
StorageError::Internal(msg) => S3Error::new(S3ErrorCode::InternalError, msg),
|
||||
}
|
||||
}
|
||||
}
|
||||
2025
myfsio-engine/crates/myfsio-storage/src/fs_backend.rs
Normal file
2025
myfsio-engine/crates/myfsio-storage/src/fs_backend.rs
Normal file
File diff suppressed because it is too large
Load Diff
4
myfsio-engine/crates/myfsio-storage/src/lib.rs
Normal file
4
myfsio-engine/crates/myfsio-storage/src/lib.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod validation;
|
||||
pub mod traits;
|
||||
pub mod error;
|
||||
pub mod fs_backend;
|
||||
125
myfsio-engine/crates/myfsio-storage/src/traits.rs
Normal file
125
myfsio-engine/crates/myfsio-storage/src/traits.rs
Normal file
@@ -0,0 +1,125 @@
|
||||
use crate::error::StorageError;
|
||||
use myfsio_common::types::*;
|
||||
use std::collections::HashMap;
|
||||
use std::path::PathBuf;
|
||||
use std::pin::Pin;
|
||||
use tokio::io::AsyncRead;
|
||||
|
||||
pub type StorageResult<T> = Result<T, StorageError>;
|
||||
pub type AsyncReadStream = Pin<Box<dyn AsyncRead + Send>>;
|
||||
|
||||
#[allow(async_fn_in_trait)]
|
||||
pub trait StorageEngine: Send + Sync {
|
||||
async fn list_buckets(&self) -> StorageResult<Vec<BucketMeta>>;
|
||||
async fn create_bucket(&self, name: &str) -> StorageResult<()>;
|
||||
async fn delete_bucket(&self, name: &str) -> StorageResult<()>;
|
||||
async fn bucket_exists(&self, name: &str) -> StorageResult<bool>;
|
||||
async fn bucket_stats(&self, name: &str) -> StorageResult<BucketStats>;
|
||||
|
||||
async fn put_object(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
stream: AsyncReadStream,
|
||||
metadata: Option<HashMap<String, String>>,
|
||||
) -> StorageResult<ObjectMeta>;
|
||||
|
||||
async fn get_object(&self, bucket: &str, key: &str) -> StorageResult<(ObjectMeta, AsyncReadStream)>;
|
||||
|
||||
async fn get_object_path(&self, bucket: &str, key: &str) -> StorageResult<PathBuf>;
|
||||
|
||||
async fn head_object(&self, bucket: &str, key: &str) -> StorageResult<ObjectMeta>;
|
||||
|
||||
async fn delete_object(&self, bucket: &str, key: &str) -> StorageResult<()>;
|
||||
|
||||
async fn copy_object(
|
||||
&self,
|
||||
src_bucket: &str,
|
||||
src_key: &str,
|
||||
dst_bucket: &str,
|
||||
dst_key: &str,
|
||||
) -> StorageResult<ObjectMeta>;
|
||||
|
||||
async fn get_object_metadata(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
) -> StorageResult<HashMap<String, String>>;
|
||||
|
||||
async fn put_object_metadata(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
metadata: &HashMap<String, String>,
|
||||
) -> StorageResult<()>;
|
||||
|
||||
async fn list_objects(&self, bucket: &str, params: &ListParams) -> StorageResult<ListObjectsResult>;
|
||||
|
||||
async fn list_objects_shallow(
|
||||
&self,
|
||||
bucket: &str,
|
||||
params: &ShallowListParams,
|
||||
) -> StorageResult<ShallowListResult>;
|
||||
|
||||
async fn initiate_multipart(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
metadata: Option<HashMap<String, String>>,
|
||||
) -> StorageResult<String>;
|
||||
|
||||
async fn upload_part(
|
||||
&self,
|
||||
bucket: &str,
|
||||
upload_id: &str,
|
||||
part_number: u32,
|
||||
stream: AsyncReadStream,
|
||||
) -> StorageResult<String>;
|
||||
|
||||
async fn complete_multipart(
|
||||
&self,
|
||||
bucket: &str,
|
||||
upload_id: &str,
|
||||
parts: &[PartInfo],
|
||||
) -> StorageResult<ObjectMeta>;
|
||||
|
||||
async fn abort_multipart(&self, bucket: &str, upload_id: &str) -> StorageResult<()>;
|
||||
|
||||
async fn list_parts(&self, bucket: &str, upload_id: &str) -> StorageResult<Vec<PartMeta>>;
|
||||
|
||||
async fn list_multipart_uploads(
|
||||
&self,
|
||||
bucket: &str,
|
||||
) -> StorageResult<Vec<MultipartUploadInfo>>;
|
||||
|
||||
async fn get_bucket_config(&self, bucket: &str) -> StorageResult<BucketConfig>;
|
||||
async fn set_bucket_config(&self, bucket: &str, config: &BucketConfig) -> StorageResult<()>;
|
||||
|
||||
async fn is_versioning_enabled(&self, bucket: &str) -> StorageResult<bool>;
|
||||
async fn set_versioning(&self, bucket: &str, enabled: bool) -> StorageResult<()>;
|
||||
|
||||
async fn list_object_versions(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
) -> StorageResult<Vec<VersionInfo>>;
|
||||
|
||||
async fn get_object_tags(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
) -> StorageResult<Vec<Tag>>;
|
||||
|
||||
async fn set_object_tags(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
tags: &[Tag],
|
||||
) -> StorageResult<()>;
|
||||
|
||||
async fn delete_object_tags(
|
||||
&self,
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
) -> StorageResult<()>;
|
||||
}
|
||||
194
myfsio-engine/crates/myfsio-storage/src/validation.rs
Normal file
194
myfsio-engine/crates/myfsio-storage/src/validation.rs
Normal file
@@ -0,0 +1,194 @@
|
||||
use std::sync::LazyLock;
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
const WINDOWS_RESERVED: &[&str] = &[
|
||||
"CON", "PRN", "AUX", "NUL", "COM0", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
|
||||
"COM8", "COM9", "LPT0", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
|
||||
"LPT9",
|
||||
];
|
||||
|
||||
const WINDOWS_ILLEGAL_CHARS: &[char] = &['<', '>', ':', '"', '/', '\\', '|', '?', '*'];
|
||||
|
||||
const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
|
||||
const SYSTEM_ROOT: &str = ".myfsio.sys";
|
||||
|
||||
static IP_REGEX: LazyLock<regex::Regex> =
|
||||
LazyLock::new(|| regex::Regex::new(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$").unwrap());
|
||||
|
||||
pub fn validate_object_key(
|
||||
object_key: &str,
|
||||
max_length_bytes: usize,
|
||||
is_windows: bool,
|
||||
reserved_prefixes: Option<&[&str]>,
|
||||
) -> Option<String> {
|
||||
if object_key.is_empty() {
|
||||
return Some("Object key required".to_string());
|
||||
}
|
||||
|
||||
if object_key.contains('\0') {
|
||||
return Some("Object key contains null bytes".to_string());
|
||||
}
|
||||
|
||||
let normalized: String = object_key.nfc().collect();
|
||||
|
||||
if normalized.len() > max_length_bytes {
|
||||
return Some(format!(
|
||||
"Object key exceeds maximum length of {} bytes",
|
||||
max_length_bytes
|
||||
));
|
||||
}
|
||||
|
||||
if normalized.starts_with('/') || normalized.starts_with('\\') {
|
||||
return Some("Object key cannot start with a slash".to_string());
|
||||
}
|
||||
|
||||
let parts: Vec<&str> = if cfg!(windows) || is_windows {
|
||||
normalized.split(['/', '\\']).collect()
|
||||
} else {
|
||||
normalized.split('/').collect()
|
||||
};
|
||||
|
||||
for part in &parts {
|
||||
if part.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if *part == ".." {
|
||||
return Some("Object key contains parent directory references".to_string());
|
||||
}
|
||||
|
||||
if *part == "." {
|
||||
return Some("Object key contains invalid segments".to_string());
|
||||
}
|
||||
|
||||
if part.chars().any(|c| (c as u32) < 32) {
|
||||
return Some("Object key contains control characters".to_string());
|
||||
}
|
||||
|
||||
if is_windows {
|
||||
if part.chars().any(|c| WINDOWS_ILLEGAL_CHARS.contains(&c)) {
|
||||
return Some(
|
||||
"Object key contains characters not supported on Windows filesystems"
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
if part.ends_with(' ') || part.ends_with('.') {
|
||||
return Some(
|
||||
"Object key segments cannot end with spaces or periods on Windows".to_string(),
|
||||
);
|
||||
}
|
||||
let trimmed = part.trim_end_matches(['.', ' ']).to_uppercase();
|
||||
if WINDOWS_RESERVED.contains(&trimmed.as_str()) {
|
||||
return Some(format!("Invalid filename segment: {}", part));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let non_empty_parts: Vec<&str> = parts.iter().filter(|p| !p.is_empty()).copied().collect();
|
||||
if let Some(top) = non_empty_parts.first() {
|
||||
if INTERNAL_FOLDERS.contains(top) || *top == SYSTEM_ROOT {
|
||||
return Some("Object key uses a reserved prefix".to_string());
|
||||
}
|
||||
|
||||
if let Some(prefixes) = reserved_prefixes {
|
||||
for prefix in prefixes {
|
||||
if *top == *prefix {
|
||||
return Some("Object key uses a reserved prefix".to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn validate_bucket_name(bucket_name: &str) -> Option<String> {
|
||||
let len = bucket_name.len();
|
||||
if len < 3 || len > 63 {
|
||||
return Some("Bucket name must be between 3 and 63 characters".to_string());
|
||||
}
|
||||
|
||||
let bytes = bucket_name.as_bytes();
|
||||
if !bytes[0].is_ascii_lowercase() && !bytes[0].is_ascii_digit() {
|
||||
return Some(
|
||||
"Bucket name must start and end with a lowercase letter or digit".to_string(),
|
||||
);
|
||||
}
|
||||
if !bytes[len - 1].is_ascii_lowercase() && !bytes[len - 1].is_ascii_digit() {
|
||||
return Some(
|
||||
"Bucket name must start and end with a lowercase letter or digit".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
for &b in bytes {
|
||||
if !b.is_ascii_lowercase() && !b.is_ascii_digit() && b != b'.' && b != b'-' {
|
||||
return Some(
|
||||
"Bucket name can only contain lowercase letters, digits, dots, and hyphens"
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if bucket_name.contains("..") {
|
||||
return Some("Bucket name must not contain consecutive periods".to_string());
|
||||
}
|
||||
|
||||
if IP_REGEX.is_match(bucket_name) {
|
||||
return Some("Bucket name must not be formatted as an IP address".to_string());
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_valid_bucket_names() {
|
||||
assert!(validate_bucket_name("my-bucket").is_none());
|
||||
assert!(validate_bucket_name("test123").is_none());
|
||||
assert!(validate_bucket_name("my.bucket.name").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_bucket_names() {
|
||||
assert!(validate_bucket_name("ab").is_some());
|
||||
assert!(validate_bucket_name("My-Bucket").is_some());
|
||||
assert!(validate_bucket_name("-bucket").is_some());
|
||||
assert!(validate_bucket_name("bucket-").is_some());
|
||||
assert!(validate_bucket_name("my..bucket").is_some());
|
||||
assert!(validate_bucket_name("192.168.1.1").is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_valid_object_keys() {
|
||||
assert!(validate_object_key("file.txt", 1024, false, None).is_none());
|
||||
assert!(validate_object_key("path/to/file.txt", 1024, false, None).is_none());
|
||||
assert!(validate_object_key("a", 1024, false, None).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_object_keys() {
|
||||
assert!(validate_object_key("", 1024, false, None).is_some());
|
||||
assert!(validate_object_key("/leading-slash", 1024, false, None).is_some());
|
||||
assert!(validate_object_key("path/../escape", 1024, false, None).is_some());
|
||||
assert!(validate_object_key(".myfsio.sys/secret", 1024, false, None).is_some());
|
||||
assert!(validate_object_key(".meta/data", 1024, false, None).is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_object_key_max_length() {
|
||||
let long_key = "a".repeat(1025);
|
||||
assert!(validate_object_key(&long_key, 1024, false, None).is_some());
|
||||
let ok_key = "a".repeat(1024);
|
||||
assert!(validate_object_key(&ok_key, 1024, false, None).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_windows_validation() {
|
||||
assert!(validate_object_key("CON", 1024, true, None).is_some());
|
||||
assert!(validate_object_key("file<name", 1024, true, None).is_some());
|
||||
assert!(validate_object_key("file.txt ", 1024, true, None).is_some());
|
||||
}
|
||||
}
|
||||
10
myfsio-engine/crates/myfsio-xml/Cargo.toml
Normal file
10
myfsio-engine/crates/myfsio-xml/Cargo.toml
Normal file
@@ -0,0 +1,10 @@
|
||||
[package]
|
||||
name = "myfsio-xml"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
myfsio-common = { path = "../myfsio-common" }
|
||||
quick-xml = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
14
myfsio-engine/crates/myfsio-xml/src/lib.rs
Normal file
14
myfsio-engine/crates/myfsio-xml/src/lib.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
pub mod response;
|
||||
pub mod request;
|
||||
|
||||
use quick_xml::Writer;
|
||||
use std::io::Cursor;
|
||||
|
||||
pub fn write_xml_element(tag: &str, text: &str) -> String {
|
||||
let mut writer = Writer::new(Cursor::new(Vec::new()));
|
||||
writer
|
||||
.create_element(tag)
|
||||
.write_text_content(quick_xml::events::BytesText::new(text))
|
||||
.unwrap();
|
||||
String::from_utf8(writer.into_inner().into_inner()).unwrap()
|
||||
}
|
||||
159
myfsio-engine/crates/myfsio-xml/src/request.rs
Normal file
159
myfsio-engine/crates/myfsio-xml/src/request.rs
Normal file
@@ -0,0 +1,159 @@
|
||||
use quick_xml::events::Event;
|
||||
use quick_xml::Reader;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct DeleteObjectsRequest {
|
||||
pub objects: Vec<ObjectIdentifier>,
|
||||
pub quiet: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ObjectIdentifier {
|
||||
pub key: String,
|
||||
pub version_id: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct CompleteMultipartUpload {
|
||||
pub parts: Vec<CompletedPart>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct CompletedPart {
|
||||
pub part_number: u32,
|
||||
pub etag: String,
|
||||
}
|
||||
|
||||
pub fn parse_complete_multipart_upload(xml: &str) -> Result<CompleteMultipartUpload, String> {
|
||||
let mut reader = Reader::from_str(xml);
|
||||
let mut result = CompleteMultipartUpload::default();
|
||||
let mut buf = Vec::new();
|
||||
let mut current_tag = String::new();
|
||||
let mut part_number: Option<u32> = None;
|
||||
let mut etag: Option<String> = None;
|
||||
let mut in_part = false;
|
||||
|
||||
loop {
|
||||
match reader.read_event_into(&mut buf) {
|
||||
Ok(Event::Start(ref e)) => {
|
||||
let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
|
||||
current_tag = name.clone();
|
||||
if name == "Part" {
|
||||
in_part = true;
|
||||
part_number = None;
|
||||
etag = None;
|
||||
}
|
||||
}
|
||||
Ok(Event::Text(ref e)) => {
|
||||
if in_part {
|
||||
let text = e.unescape().map_err(|e| e.to_string())?.to_string();
|
||||
match current_tag.as_str() {
|
||||
"PartNumber" => {
|
||||
part_number = Some(text.trim().parse().map_err(|e: std::num::ParseIntError| e.to_string())?);
|
||||
}
|
||||
"ETag" => {
|
||||
etag = Some(text.trim().trim_matches('"').to_string());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Event::End(ref e)) => {
|
||||
let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
|
||||
if name == "Part" && in_part {
|
||||
if let (Some(pn), Some(et)) = (part_number.take(), etag.take()) {
|
||||
result.parts.push(CompletedPart {
|
||||
part_number: pn,
|
||||
etag: et,
|
||||
});
|
||||
}
|
||||
in_part = false;
|
||||
}
|
||||
}
|
||||
Ok(Event::Eof) => break,
|
||||
Err(e) => return Err(format!("XML parse error: {}", e)),
|
||||
_ => {}
|
||||
}
|
||||
buf.clear();
|
||||
}
|
||||
|
||||
result.parts.sort_by_key(|p| p.part_number);
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn parse_delete_objects(xml: &str) -> Result<DeleteObjectsRequest, String> {
|
||||
let mut reader = Reader::from_str(xml);
|
||||
let mut result = DeleteObjectsRequest::default();
|
||||
let mut buf = Vec::new();
|
||||
let mut current_tag = String::new();
|
||||
let mut current_key: Option<String> = None;
|
||||
let mut current_version_id: Option<String> = None;
|
||||
let mut in_object = false;
|
||||
|
||||
loop {
|
||||
match reader.read_event_into(&mut buf) {
|
||||
Ok(Event::Start(ref e)) => {
|
||||
let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
|
||||
current_tag = name.clone();
|
||||
if name == "Object" {
|
||||
in_object = true;
|
||||
current_key = None;
|
||||
current_version_id = None;
|
||||
}
|
||||
}
|
||||
Ok(Event::Text(ref e)) => {
|
||||
let text = e.unescape().map_err(|e| e.to_string())?.to_string();
|
||||
match current_tag.as_str() {
|
||||
"Key" if in_object => {
|
||||
current_key = Some(text.trim().to_string());
|
||||
}
|
||||
"VersionId" if in_object => {
|
||||
current_version_id = Some(text.trim().to_string());
|
||||
}
|
||||
"Quiet" => {
|
||||
result.quiet = text.trim() == "true";
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok(Event::End(ref e)) => {
|
||||
let name = String::from_utf8_lossy(e.name().as_ref()).to_string();
|
||||
if name == "Object" && in_object {
|
||||
if let Some(key) = current_key.take() {
|
||||
result.objects.push(ObjectIdentifier {
|
||||
key,
|
||||
version_id: current_version_id.take(),
|
||||
});
|
||||
}
|
||||
in_object = false;
|
||||
}
|
||||
}
|
||||
Ok(Event::Eof) => break,
|
||||
Err(e) => return Err(format!("XML parse error: {}", e)),
|
||||
_ => {}
|
||||
}
|
||||
buf.clear();
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_parse_complete_multipart() {
|
||||
let xml = r#"<CompleteMultipartUpload>
|
||||
<Part><PartNumber>2</PartNumber><ETag>"etag2"</ETag></Part>
|
||||
<Part><PartNumber>1</PartNumber><ETag>"etag1"</ETag></Part>
|
||||
</CompleteMultipartUpload>"#;
|
||||
|
||||
let result = parse_complete_multipart_upload(xml).unwrap();
|
||||
assert_eq!(result.parts.len(), 2);
|
||||
assert_eq!(result.parts[0].part_number, 1);
|
||||
assert_eq!(result.parts[0].etag, "etag1");
|
||||
assert_eq!(result.parts[1].part_number, 2);
|
||||
assert_eq!(result.parts[1].etag, "etag2");
|
||||
}
|
||||
}
|
||||
363
myfsio-engine/crates/myfsio-xml/src/response.rs
Normal file
363
myfsio-engine/crates/myfsio-xml/src/response.rs
Normal file
@@ -0,0 +1,363 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use myfsio_common::types::{BucketMeta, ObjectMeta};
|
||||
use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event};
|
||||
use quick_xml::Writer;
|
||||
use std::io::Cursor;
|
||||
|
||||
pub fn format_s3_datetime(dt: &DateTime<Utc>) -> String {
|
||||
dt.format("%Y-%m-%dT%H:%M:%S%.3fZ").to_string()
|
||||
}
|
||||
|
||||
pub fn list_buckets_xml(owner_id: &str, owner_name: &str, buckets: &[BucketMeta]) -> String {
|
||||
let mut writer = Writer::new(Cursor::new(Vec::new()));
|
||||
|
||||
writer.write_event(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))).unwrap();
|
||||
|
||||
let start = BytesStart::new("ListAllMyBucketsResult")
|
||||
.with_attributes([("xmlns", "http://s3.amazonaws.com/doc/2006-03-01/")]);
|
||||
writer.write_event(Event::Start(start)).unwrap();
|
||||
|
||||
writer.write_event(Event::Start(BytesStart::new("Owner"))).unwrap();
|
||||
write_text_element(&mut writer, "ID", owner_id);
|
||||
write_text_element(&mut writer, "DisplayName", owner_name);
|
||||
writer.write_event(Event::End(BytesEnd::new("Owner"))).unwrap();
|
||||
|
||||
writer.write_event(Event::Start(BytesStart::new("Buckets"))).unwrap();
|
||||
for bucket in buckets {
|
||||
writer.write_event(Event::Start(BytesStart::new("Bucket"))).unwrap();
|
||||
write_text_element(&mut writer, "Name", &bucket.name);
|
||||
write_text_element(&mut writer, "CreationDate", &format_s3_datetime(&bucket.creation_date));
|
||||
writer.write_event(Event::End(BytesEnd::new("Bucket"))).unwrap();
|
||||
}
|
||||
writer.write_event(Event::End(BytesEnd::new("Buckets"))).unwrap();
|
||||
|
||||
writer.write_event(Event::End(BytesEnd::new("ListAllMyBucketsResult"))).unwrap();
|
||||
|
||||
String::from_utf8(writer.into_inner().into_inner()).unwrap()
|
||||
}
|
||||
|
||||
pub fn list_objects_v2_xml(
|
||||
bucket_name: &str,
|
||||
prefix: &str,
|
||||
delimiter: &str,
|
||||
max_keys: usize,
|
||||
objects: &[ObjectMeta],
|
||||
common_prefixes: &[String],
|
||||
is_truncated: bool,
|
||||
continuation_token: Option<&str>,
|
||||
next_continuation_token: Option<&str>,
|
||||
key_count: usize,
|
||||
) -> String {
|
||||
let mut writer = Writer::new(Cursor::new(Vec::new()));
|
||||
|
||||
writer.write_event(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))).unwrap();
|
||||
|
||||
let start = BytesStart::new("ListBucketResult")
|
||||
.with_attributes([("xmlns", "http://s3.amazonaws.com/doc/2006-03-01/")]);
|
||||
writer.write_event(Event::Start(start)).unwrap();
|
||||
|
||||
write_text_element(&mut writer, "Name", bucket_name);
|
||||
write_text_element(&mut writer, "Prefix", prefix);
|
||||
if !delimiter.is_empty() {
|
||||
write_text_element(&mut writer, "Delimiter", delimiter);
|
||||
}
|
||||
write_text_element(&mut writer, "MaxKeys", &max_keys.to_string());
|
||||
write_text_element(&mut writer, "KeyCount", &key_count.to_string());
|
||||
write_text_element(&mut writer, "IsTruncated", &is_truncated.to_string());
|
||||
|
||||
if let Some(token) = continuation_token {
|
||||
write_text_element(&mut writer, "ContinuationToken", token);
|
||||
}
|
||||
if let Some(token) = next_continuation_token {
|
||||
write_text_element(&mut writer, "NextContinuationToken", token);
|
||||
}
|
||||
|
||||
for obj in objects {
|
||||
writer.write_event(Event::Start(BytesStart::new("Contents"))).unwrap();
|
||||
write_text_element(&mut writer, "Key", &obj.key);
|
||||
write_text_element(&mut writer, "LastModified", &format_s3_datetime(&obj.last_modified));
|
||||
if let Some(ref etag) = obj.etag {
|
||||
write_text_element(&mut writer, "ETag", &format!("\"{}\"", etag));
|
||||
}
|
||||
write_text_element(&mut writer, "Size", &obj.size.to_string());
|
||||
write_text_element(&mut writer, "StorageClass", obj.storage_class.as_deref().unwrap_or("STANDARD"));
|
||||
writer.write_event(Event::End(BytesEnd::new("Contents"))).unwrap();
|
||||
}
|
||||
|
||||
for prefix in common_prefixes {
|
||||
writer.write_event(Event::Start(BytesStart::new("CommonPrefixes"))).unwrap();
|
||||
write_text_element(&mut writer, "Prefix", prefix);
|
||||
writer.write_event(Event::End(BytesEnd::new("CommonPrefixes"))).unwrap();
|
||||
}
|
||||
|
||||
writer.write_event(Event::End(BytesEnd::new("ListBucketResult"))).unwrap();
|
||||
|
||||
String::from_utf8(writer.into_inner().into_inner()).unwrap()
|
||||
}
|
||||
|
||||
pub fn list_objects_v1_xml(
|
||||
bucket_name: &str,
|
||||
prefix: &str,
|
||||
marker: &str,
|
||||
delimiter: &str,
|
||||
max_keys: usize,
|
||||
objects: &[ObjectMeta],
|
||||
common_prefixes: &[String],
|
||||
is_truncated: bool,
|
||||
next_marker: Option<&str>,
|
||||
) -> String {
|
||||
let mut writer = Writer::new(Cursor::new(Vec::new()));
|
||||
|
||||
writer
|
||||
.write_event(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None)))
|
||||
.unwrap();
|
||||
|
||||
let start = BytesStart::new("ListBucketResult")
|
||||
.with_attributes([("xmlns", "http://s3.amazonaws.com/doc/2006-03-01/")]);
|
||||
writer.write_event(Event::Start(start)).unwrap();
|
||||
|
||||
write_text_element(&mut writer, "Name", bucket_name);
|
||||
write_text_element(&mut writer, "Prefix", prefix);
|
||||
write_text_element(&mut writer, "Marker", marker);
|
||||
write_text_element(&mut writer, "MaxKeys", &max_keys.to_string());
|
||||
write_text_element(&mut writer, "IsTruncated", &is_truncated.to_string());
|
||||
|
||||
if !delimiter.is_empty() {
|
||||
write_text_element(&mut writer, "Delimiter", delimiter);
|
||||
}
|
||||
if !delimiter.is_empty() && is_truncated {
|
||||
if let Some(nm) = next_marker {
|
||||
if !nm.is_empty() {
|
||||
write_text_element(&mut writer, "NextMarker", nm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for obj in objects {
|
||||
writer
|
||||
.write_event(Event::Start(BytesStart::new("Contents")))
|
||||
.unwrap();
|
||||
write_text_element(&mut writer, "Key", &obj.key);
|
||||
write_text_element(&mut writer, "LastModified", &format_s3_datetime(&obj.last_modified));
|
||||
if let Some(ref etag) = obj.etag {
|
||||
write_text_element(&mut writer, "ETag", &format!("\"{}\"", etag));
|
||||
}
|
||||
write_text_element(&mut writer, "Size", &obj.size.to_string());
|
||||
writer
|
||||
.write_event(Event::End(BytesEnd::new("Contents")))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
for cp in common_prefixes {
|
||||
writer
|
||||
.write_event(Event::Start(BytesStart::new("CommonPrefixes")))
|
||||
.unwrap();
|
||||
write_text_element(&mut writer, "Prefix", cp);
|
||||
writer
|
||||
.write_event(Event::End(BytesEnd::new("CommonPrefixes")))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
writer
|
||||
.write_event(Event::End(BytesEnd::new("ListBucketResult")))
|
||||
.unwrap();
|
||||
|
||||
String::from_utf8(writer.into_inner().into_inner()).unwrap()
|
||||
}
|
||||
|
||||
fn write_text_element(writer: &mut Writer<Cursor<Vec<u8>>>, tag: &str, text: &str) {
|
||||
writer.write_event(Event::Start(BytesStart::new(tag))).unwrap();
|
||||
writer.write_event(Event::Text(BytesText::new(text))).unwrap();
|
||||
writer.write_event(Event::End(BytesEnd::new(tag))).unwrap();
|
||||
}
|
||||
|
||||
pub fn initiate_multipart_upload_xml(bucket: &str, key: &str, upload_id: &str) -> String {
|
||||
let mut writer = Writer::new(Cursor::new(Vec::new()));
|
||||
writer.write_event(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))).unwrap();
|
||||
|
||||
let start = BytesStart::new("InitiateMultipartUploadResult")
|
||||
.with_attributes([("xmlns", "http://s3.amazonaws.com/doc/2006-03-01/")]);
|
||||
writer.write_event(Event::Start(start)).unwrap();
|
||||
write_text_element(&mut writer, "Bucket", bucket);
|
||||
write_text_element(&mut writer, "Key", key);
|
||||
write_text_element(&mut writer, "UploadId", upload_id);
|
||||
writer.write_event(Event::End(BytesEnd::new("InitiateMultipartUploadResult"))).unwrap();
|
||||
|
||||
String::from_utf8(writer.into_inner().into_inner()).unwrap()
|
||||
}
|
||||
|
||||
pub fn complete_multipart_upload_xml(
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
etag: &str,
|
||||
location: &str,
|
||||
) -> String {
|
||||
let mut writer = Writer::new(Cursor::new(Vec::new()));
|
||||
writer.write_event(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))).unwrap();
|
||||
|
||||
let start = BytesStart::new("CompleteMultipartUploadResult")
|
||||
.with_attributes([("xmlns", "http://s3.amazonaws.com/doc/2006-03-01/")]);
|
||||
writer.write_event(Event::Start(start)).unwrap();
|
||||
write_text_element(&mut writer, "Location", location);
|
||||
write_text_element(&mut writer, "Bucket", bucket);
|
||||
write_text_element(&mut writer, "Key", key);
|
||||
write_text_element(&mut writer, "ETag", &format!("\"{}\"", etag));
|
||||
writer.write_event(Event::End(BytesEnd::new("CompleteMultipartUploadResult"))).unwrap();
|
||||
|
||||
String::from_utf8(writer.into_inner().into_inner()).unwrap()
|
||||
}
|
||||
|
||||
pub fn copy_object_result_xml(etag: &str, last_modified: &str) -> String {
|
||||
let mut writer = Writer::new(Cursor::new(Vec::new()));
|
||||
writer.write_event(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))).unwrap();
|
||||
|
||||
let start = BytesStart::new("CopyObjectResult")
|
||||
.with_attributes([("xmlns", "http://s3.amazonaws.com/doc/2006-03-01/")]);
|
||||
writer.write_event(Event::Start(start)).unwrap();
|
||||
write_text_element(&mut writer, "ETag", &format!("\"{}\"", etag));
|
||||
write_text_element(&mut writer, "LastModified", last_modified);
|
||||
writer.write_event(Event::End(BytesEnd::new("CopyObjectResult"))).unwrap();
|
||||
|
||||
String::from_utf8(writer.into_inner().into_inner()).unwrap()
|
||||
}
|
||||
|
||||
pub fn delete_result_xml(
|
||||
deleted: &[(String, Option<String>)],
|
||||
errors: &[(String, String, String)],
|
||||
quiet: bool,
|
||||
) -> String {
|
||||
let mut writer = Writer::new(Cursor::new(Vec::new()));
|
||||
writer.write_event(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))).unwrap();
|
||||
|
||||
let start = BytesStart::new("DeleteResult")
|
||||
.with_attributes([("xmlns", "http://s3.amazonaws.com/doc/2006-03-01/")]);
|
||||
writer.write_event(Event::Start(start)).unwrap();
|
||||
|
||||
if !quiet {
|
||||
for (key, version_id) in deleted {
|
||||
writer.write_event(Event::Start(BytesStart::new("Deleted"))).unwrap();
|
||||
write_text_element(&mut writer, "Key", key);
|
||||
if let Some(vid) = version_id {
|
||||
write_text_element(&mut writer, "VersionId", vid);
|
||||
}
|
||||
writer.write_event(Event::End(BytesEnd::new("Deleted"))).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
for (key, code, message) in errors {
|
||||
writer.write_event(Event::Start(BytesStart::new("Error"))).unwrap();
|
||||
write_text_element(&mut writer, "Key", key);
|
||||
write_text_element(&mut writer, "Code", code);
|
||||
write_text_element(&mut writer, "Message", message);
|
||||
writer.write_event(Event::End(BytesEnd::new("Error"))).unwrap();
|
||||
}
|
||||
|
||||
writer.write_event(Event::End(BytesEnd::new("DeleteResult"))).unwrap();
|
||||
|
||||
String::from_utf8(writer.into_inner().into_inner()).unwrap()
|
||||
}
|
||||
|
||||
pub fn list_multipart_uploads_xml(
|
||||
bucket: &str,
|
||||
uploads: &[myfsio_common::types::MultipartUploadInfo],
|
||||
) -> String {
|
||||
let mut writer = Writer::new(Cursor::new(Vec::new()));
|
||||
writer.write_event(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))).unwrap();
|
||||
|
||||
let start = BytesStart::new("ListMultipartUploadsResult")
|
||||
.with_attributes([("xmlns", "http://s3.amazonaws.com/doc/2006-03-01/")]);
|
||||
writer.write_event(Event::Start(start)).unwrap();
|
||||
write_text_element(&mut writer, "Bucket", bucket);
|
||||
|
||||
for upload in uploads {
|
||||
writer.write_event(Event::Start(BytesStart::new("Upload"))).unwrap();
|
||||
write_text_element(&mut writer, "Key", &upload.key);
|
||||
write_text_element(&mut writer, "UploadId", &upload.upload_id);
|
||||
write_text_element(&mut writer, "Initiated", &format_s3_datetime(&upload.initiated));
|
||||
writer.write_event(Event::End(BytesEnd::new("Upload"))).unwrap();
|
||||
}
|
||||
|
||||
writer.write_event(Event::End(BytesEnd::new("ListMultipartUploadsResult"))).unwrap();
|
||||
|
||||
String::from_utf8(writer.into_inner().into_inner()).unwrap()
|
||||
}
|
||||
|
||||
pub fn list_parts_xml(
|
||||
bucket: &str,
|
||||
key: &str,
|
||||
upload_id: &str,
|
||||
parts: &[myfsio_common::types::PartMeta],
|
||||
) -> String {
|
||||
let mut writer = Writer::new(Cursor::new(Vec::new()));
|
||||
writer.write_event(Event::Decl(BytesDecl::new("1.0", Some("UTF-8"), None))).unwrap();
|
||||
|
||||
let start = BytesStart::new("ListPartsResult")
|
||||
.with_attributes([("xmlns", "http://s3.amazonaws.com/doc/2006-03-01/")]);
|
||||
writer.write_event(Event::Start(start)).unwrap();
|
||||
write_text_element(&mut writer, "Bucket", bucket);
|
||||
write_text_element(&mut writer, "Key", key);
|
||||
write_text_element(&mut writer, "UploadId", upload_id);
|
||||
|
||||
for part in parts {
|
||||
writer.write_event(Event::Start(BytesStart::new("Part"))).unwrap();
|
||||
write_text_element(&mut writer, "PartNumber", &part.part_number.to_string());
|
||||
write_text_element(&mut writer, "ETag", &format!("\"{}\"", part.etag));
|
||||
write_text_element(&mut writer, "Size", &part.size.to_string());
|
||||
if let Some(ref lm) = part.last_modified {
|
||||
write_text_element(&mut writer, "LastModified", &format_s3_datetime(lm));
|
||||
}
|
||||
writer.write_event(Event::End(BytesEnd::new("Part"))).unwrap();
|
||||
}
|
||||
|
||||
writer.write_event(Event::End(BytesEnd::new("ListPartsResult"))).unwrap();
|
||||
|
||||
String::from_utf8(writer.into_inner().into_inner()).unwrap()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use chrono::Utc;
|
||||
|
||||
#[test]
|
||||
fn test_list_buckets_xml() {
|
||||
let buckets = vec![BucketMeta {
|
||||
name: "test-bucket".to_string(),
|
||||
creation_date: Utc::now(),
|
||||
}];
|
||||
let xml = list_buckets_xml("owner-id", "owner-name", &buckets);
|
||||
assert!(xml.contains("<Name>test-bucket</Name>"));
|
||||
assert!(xml.contains("<ID>owner-id</ID>"));
|
||||
assert!(xml.contains("ListAllMyBucketsResult"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_objects_v2_xml() {
|
||||
let objects = vec![ObjectMeta::new("file.txt".to_string(), 1024, Utc::now())];
|
||||
let xml = list_objects_v2_xml(
|
||||
"my-bucket", "", "/", 1000, &objects, &[], false, None, None, 1,
|
||||
);
|
||||
assert!(xml.contains("<Key>file.txt</Key>"));
|
||||
assert!(xml.contains("<Size>1024</Size>"));
|
||||
assert!(xml.contains("<IsTruncated>false</IsTruncated>"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_list_objects_v1_xml() {
|
||||
let objects = vec![ObjectMeta::new("file.txt".to_string(), 1024, Utc::now())];
|
||||
let xml = list_objects_v1_xml(
|
||||
"my-bucket",
|
||||
"",
|
||||
"",
|
||||
"/",
|
||||
1000,
|
||||
&objects,
|
||||
&[],
|
||||
false,
|
||||
None,
|
||||
);
|
||||
assert!(xml.contains("<Key>file.txt</Key>"));
|
||||
assert!(xml.contains("<Size>1024</Size>"));
|
||||
assert!(xml.contains("<Marker></Marker>"));
|
||||
}
|
||||
}
|
||||
24
myfsio_core/Cargo.toml
Normal file
24
myfsio_core/Cargo.toml
Normal file
@@ -0,0 +1,24 @@
|
||||
[package]
|
||||
name = "myfsio_core"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[lib]
|
||||
name = "myfsio_core"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
pyo3 = { version = "0.28", features = ["extension-module"] }
|
||||
hmac = "0.12"
|
||||
sha2 = "0.10"
|
||||
md-5 = "0.10"
|
||||
hex = "0.4"
|
||||
unicode-normalization = "0.1"
|
||||
serde_json = "1"
|
||||
regex = "1"
|
||||
lru = "0.14"
|
||||
parking_lot = "0.12"
|
||||
percent-encoding = "2"
|
||||
aes-gcm = "0.10"
|
||||
hkdf = "0.12"
|
||||
uuid = { version = "1", features = ["v4"] }
|
||||
11
myfsio_core/pyproject.toml
Normal file
11
myfsio_core/pyproject.toml
Normal file
@@ -0,0 +1,11 @@
|
||||
[build-system]
|
||||
requires = ["maturin>=1.0,<2.0"]
|
||||
build-backend = "maturin"
|
||||
|
||||
[project]
|
||||
name = "myfsio_core"
|
||||
version = "0.1.0"
|
||||
requires-python = ">=3.10"
|
||||
|
||||
[tool.maturin]
|
||||
features = ["pyo3/extension-module"]
|
||||
192
myfsio_core/src/crypto.rs
Normal file
192
myfsio_core/src/crypto.rs
Normal file
@@ -0,0 +1,192 @@
|
||||
use aes_gcm::aead::Aead;
|
||||
use aes_gcm::{Aes256Gcm, KeyInit, Nonce};
|
||||
use hkdf::Hkdf;
|
||||
use pyo3::exceptions::{PyIOError, PyValueError};
|
||||
use pyo3::prelude::*;
|
||||
use sha2::Sha256;
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Seek, SeekFrom, Write};
|
||||
|
||||
const DEFAULT_CHUNK_SIZE: usize = 65536;
|
||||
const HEADER_SIZE: usize = 4;
|
||||
|
||||
fn read_exact_chunk(reader: &mut impl Read, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||
let mut filled = 0;
|
||||
while filled < buf.len() {
|
||||
match reader.read(&mut buf[filled..]) {
|
||||
Ok(0) => break,
|
||||
Ok(n) => filled += n,
|
||||
Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
|
||||
Err(e) => return Err(e),
|
||||
}
|
||||
}
|
||||
Ok(filled)
|
||||
}
|
||||
|
||||
fn derive_chunk_nonce(base_nonce: &[u8], chunk_index: u32) -> Result<[u8; 12], String> {
|
||||
let hkdf = Hkdf::<Sha256>::new(Some(base_nonce), b"chunk_nonce");
|
||||
let mut okm = [0u8; 12];
|
||||
hkdf.expand(&chunk_index.to_be_bytes(), &mut okm)
|
||||
.map_err(|e| format!("HKDF expand failed: {}", e))?;
|
||||
Ok(okm)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (input_path, output_path, key, base_nonce, chunk_size=DEFAULT_CHUNK_SIZE))]
|
||||
pub fn encrypt_stream_chunked(
|
||||
py: Python<'_>,
|
||||
input_path: &str,
|
||||
output_path: &str,
|
||||
key: &[u8],
|
||||
base_nonce: &[u8],
|
||||
chunk_size: usize,
|
||||
) -> PyResult<u32> {
|
||||
if key.len() != 32 {
|
||||
return Err(PyValueError::new_err(format!(
|
||||
"Key must be 32 bytes, got {}",
|
||||
key.len()
|
||||
)));
|
||||
}
|
||||
if base_nonce.len() != 12 {
|
||||
return Err(PyValueError::new_err(format!(
|
||||
"Base nonce must be 12 bytes, got {}",
|
||||
base_nonce.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let chunk_size = if chunk_size == 0 {
|
||||
DEFAULT_CHUNK_SIZE
|
||||
} else {
|
||||
chunk_size
|
||||
};
|
||||
|
||||
let inp = input_path.to_owned();
|
||||
let out = output_path.to_owned();
|
||||
let key_arr: [u8; 32] = key.try_into().unwrap();
|
||||
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
|
||||
|
||||
py.detach(move || {
|
||||
let cipher = Aes256Gcm::new(&key_arr.into());
|
||||
|
||||
let mut infile = File::open(&inp)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to open input: {}", e)))?;
|
||||
let mut outfile = File::create(&out)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to create output: {}", e)))?;
|
||||
|
||||
outfile
|
||||
.write_all(&[0u8; 4])
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write header: {}", e)))?;
|
||||
|
||||
let mut buf = vec![0u8; chunk_size];
|
||||
let mut chunk_index: u32 = 0;
|
||||
|
||||
loop {
|
||||
let n = read_exact_chunk(&mut infile, &mut buf)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to read: {}", e)))?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)
|
||||
.map_err(|e| PyValueError::new_err(e))?;
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
|
||||
let encrypted = cipher
|
||||
.encrypt(nonce, &buf[..n])
|
||||
.map_err(|e| PyValueError::new_err(format!("Encrypt failed: {}", e)))?;
|
||||
|
||||
let size = encrypted.len() as u32;
|
||||
outfile
|
||||
.write_all(&size.to_be_bytes())
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk size: {}", e)))?;
|
||||
outfile
|
||||
.write_all(&encrypted)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk: {}", e)))?;
|
||||
|
||||
chunk_index += 1;
|
||||
}
|
||||
|
||||
outfile
|
||||
.seek(SeekFrom::Start(0))
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to seek: {}", e)))?;
|
||||
outfile
|
||||
.write_all(&chunk_index.to_be_bytes())
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write chunk count: {}", e)))?;
|
||||
|
||||
Ok(chunk_index)
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn decrypt_stream_chunked(
|
||||
py: Python<'_>,
|
||||
input_path: &str,
|
||||
output_path: &str,
|
||||
key: &[u8],
|
||||
base_nonce: &[u8],
|
||||
) -> PyResult<u32> {
|
||||
if key.len() != 32 {
|
||||
return Err(PyValueError::new_err(format!(
|
||||
"Key must be 32 bytes, got {}",
|
||||
key.len()
|
||||
)));
|
||||
}
|
||||
if base_nonce.len() != 12 {
|
||||
return Err(PyValueError::new_err(format!(
|
||||
"Base nonce must be 12 bytes, got {}",
|
||||
base_nonce.len()
|
||||
)));
|
||||
}
|
||||
|
||||
let inp = input_path.to_owned();
|
||||
let out = output_path.to_owned();
|
||||
let key_arr: [u8; 32] = key.try_into().unwrap();
|
||||
let nonce_arr: [u8; 12] = base_nonce.try_into().unwrap();
|
||||
|
||||
py.detach(move || {
|
||||
let cipher = Aes256Gcm::new(&key_arr.into());
|
||||
|
||||
let mut infile = File::open(&inp)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to open input: {}", e)))?;
|
||||
let mut outfile = File::create(&out)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to create output: {}", e)))?;
|
||||
|
||||
let mut header = [0u8; HEADER_SIZE];
|
||||
infile
|
||||
.read_exact(&mut header)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to read header: {}", e)))?;
|
||||
let chunk_count = u32::from_be_bytes(header);
|
||||
|
||||
let mut size_buf = [0u8; HEADER_SIZE];
|
||||
for chunk_index in 0..chunk_count {
|
||||
infile
|
||||
.read_exact(&mut size_buf)
|
||||
.map_err(|e| {
|
||||
PyIOError::new_err(format!(
|
||||
"Failed to read chunk {} size: {}",
|
||||
chunk_index, e
|
||||
))
|
||||
})?;
|
||||
let chunk_size = u32::from_be_bytes(size_buf) as usize;
|
||||
|
||||
let mut encrypted = vec![0u8; chunk_size];
|
||||
infile.read_exact(&mut encrypted).map_err(|e| {
|
||||
PyIOError::new_err(format!("Failed to read chunk {}: {}", chunk_index, e))
|
||||
})?;
|
||||
|
||||
let nonce_bytes = derive_chunk_nonce(&nonce_arr, chunk_index)
|
||||
.map_err(|e| PyValueError::new_err(e))?;
|
||||
let nonce = Nonce::from_slice(&nonce_bytes);
|
||||
|
||||
let decrypted = cipher.decrypt(nonce, encrypted.as_ref()).map_err(|e| {
|
||||
PyValueError::new_err(format!("Decrypt chunk {} failed: {}", chunk_index, e))
|
||||
})?;
|
||||
|
||||
outfile.write_all(&decrypted).map_err(|e| {
|
||||
PyIOError::new_err(format!("Failed to write chunk {}: {}", chunk_index, e))
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(chunk_count)
|
||||
})
|
||||
}
|
||||
90
myfsio_core/src/hashing.rs
Normal file
90
myfsio_core/src/hashing.rs
Normal file
@@ -0,0 +1,90 @@
|
||||
use md5::{Digest, Md5};
|
||||
use pyo3::exceptions::PyIOError;
|
||||
use pyo3::prelude::*;
|
||||
use sha2::Sha256;
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
|
||||
const CHUNK_SIZE: usize = 65536;
|
||||
|
||||
#[pyfunction]
|
||||
pub fn md5_file(py: Python<'_>, path: &str) -> PyResult<String> {
|
||||
let path = path.to_owned();
|
||||
py.detach(move || {
|
||||
let mut file = File::open(&path)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
|
||||
let mut hasher = Md5::new();
|
||||
let mut buf = vec![0u8; CHUNK_SIZE];
|
||||
loop {
|
||||
let n = file
|
||||
.read(&mut buf)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buf[..n]);
|
||||
}
|
||||
Ok(format!("{:x}", hasher.finalize()))
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn md5_bytes(data: &[u8]) -> String {
|
||||
let mut hasher = Md5::new();
|
||||
hasher.update(data);
|
||||
format!("{:x}", hasher.finalize())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn sha256_file(py: Python<'_>, path: &str) -> PyResult<String> {
|
||||
let path = path.to_owned();
|
||||
py.detach(move || {
|
||||
let mut file = File::open(&path)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
|
||||
let mut hasher = Sha256::new();
|
||||
let mut buf = vec![0u8; CHUNK_SIZE];
|
||||
loop {
|
||||
let n = file
|
||||
.read(&mut buf)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buf[..n]);
|
||||
}
|
||||
Ok(format!("{:x}", hasher.finalize()))
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn sha256_bytes(data: &[u8]) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data);
|
||||
format!("{:x}", hasher.finalize())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn md5_sha256_file(py: Python<'_>, path: &str) -> PyResult<(String, String)> {
|
||||
let path = path.to_owned();
|
||||
py.detach(move || {
|
||||
let mut file = File::open(&path)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
|
||||
let mut md5_hasher = Md5::new();
|
||||
let mut sha_hasher = Sha256::new();
|
||||
let mut buf = vec![0u8; CHUNK_SIZE];
|
||||
loop {
|
||||
let n = file
|
||||
.read(&mut buf)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
md5_hasher.update(&buf[..n]);
|
||||
sha_hasher.update(&buf[..n]);
|
||||
}
|
||||
Ok((
|
||||
format!("{:x}", md5_hasher.finalize()),
|
||||
format!("{:x}", sha_hasher.finalize()),
|
||||
))
|
||||
})
|
||||
}
|
||||
51
myfsio_core/src/lib.rs
Normal file
51
myfsio_core/src/lib.rs
Normal file
@@ -0,0 +1,51 @@
|
||||
mod crypto;
|
||||
mod hashing;
|
||||
mod metadata;
|
||||
mod sigv4;
|
||||
mod storage;
|
||||
mod streaming;
|
||||
mod validation;
|
||||
|
||||
use pyo3::prelude::*;
|
||||
|
||||
#[pymodule]
|
||||
mod myfsio_core {
|
||||
use super::*;
|
||||
|
||||
#[pymodule_init]
|
||||
fn init(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_function(wrap_pyfunction!(sigv4::verify_sigv4_signature, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(sigv4::derive_signing_key, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(sigv4::compute_signature, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(sigv4::build_string_to_sign, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(sigv4::constant_time_compare, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(sigv4::clear_signing_key_cache, m)?)?;
|
||||
|
||||
m.add_function(wrap_pyfunction!(hashing::md5_file, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(hashing::md5_bytes, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(hashing::sha256_file, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(hashing::sha256_bytes, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(hashing::md5_sha256_file, m)?)?;
|
||||
|
||||
m.add_function(wrap_pyfunction!(validation::validate_object_key, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(validation::validate_bucket_name, m)?)?;
|
||||
|
||||
m.add_function(wrap_pyfunction!(metadata::read_index_entry, m)?)?;
|
||||
|
||||
m.add_function(wrap_pyfunction!(storage::write_index_entry, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(storage::delete_index_entry, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(storage::check_bucket_contents, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(storage::shallow_scan, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(storage::bucket_stats_scan, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(storage::search_objects_scan, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(storage::build_object_cache, m)?)?;
|
||||
|
||||
m.add_function(wrap_pyfunction!(streaming::stream_to_file_with_md5, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(streaming::assemble_parts_with_md5, m)?)?;
|
||||
|
||||
m.add_function(wrap_pyfunction!(crypto::encrypt_stream_chunked, m)?)?;
|
||||
m.add_function(wrap_pyfunction!(crypto::decrypt_stream_chunked, m)?)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
71
myfsio_core/src/metadata.rs
Normal file
71
myfsio_core/src/metadata.rs
Normal file
@@ -0,0 +1,71 @@
|
||||
use pyo3::exceptions::PyValueError;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::{PyDict, PyList, PyString};
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
|
||||
const MAX_DEPTH: u32 = 64;
|
||||
|
||||
fn value_to_py(py: Python<'_>, v: &Value, depth: u32) -> PyResult<Py<PyAny>> {
|
||||
if depth > MAX_DEPTH {
|
||||
return Err(PyValueError::new_err("JSON nesting too deep"));
|
||||
}
|
||||
match v {
|
||||
Value::Null => Ok(py.None()),
|
||||
Value::Bool(b) => Ok((*b).into_pyobject(py)?.to_owned().into_any().unbind()),
|
||||
Value::Number(n) => {
|
||||
if let Some(i) = n.as_i64() {
|
||||
Ok(i.into_pyobject(py)?.into_any().unbind())
|
||||
} else if let Some(f) = n.as_f64() {
|
||||
Ok(f.into_pyobject(py)?.into_any().unbind())
|
||||
} else {
|
||||
Ok(py.None())
|
||||
}
|
||||
}
|
||||
Value::String(s) => Ok(PyString::new(py, s).into_any().unbind()),
|
||||
Value::Array(arr) => {
|
||||
let list = PyList::empty(py);
|
||||
for item in arr {
|
||||
list.append(value_to_py(py, item, depth + 1)?)?;
|
||||
}
|
||||
Ok(list.into_any().unbind())
|
||||
}
|
||||
Value::Object(map) => {
|
||||
let dict = PyDict::new(py);
|
||||
for (k, val) in map {
|
||||
dict.set_item(k, value_to_py(py, val, depth + 1)?)?;
|
||||
}
|
||||
Ok(dict.into_any().unbind())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn read_index_entry(
|
||||
py: Python<'_>,
|
||||
path: &str,
|
||||
entry_name: &str,
|
||||
) -> PyResult<Option<Py<PyAny>>> {
|
||||
let path_owned = path.to_owned();
|
||||
let entry_owned = entry_name.to_owned();
|
||||
|
||||
let entry: Option<Value> = py.detach(move || -> PyResult<Option<Value>> {
|
||||
let content = match fs::read_to_string(&path_owned) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return Ok(None),
|
||||
};
|
||||
let parsed: Value = match serde_json::from_str(&content) {
|
||||
Ok(v) => v,
|
||||
Err(_) => return Ok(None),
|
||||
};
|
||||
match parsed {
|
||||
Value::Object(mut map) => Ok(map.remove(&entry_owned)),
|
||||
_ => Ok(None),
|
||||
}
|
||||
})?;
|
||||
|
||||
match entry {
|
||||
Some(val) => Ok(Some(value_to_py(py, &val, 0)?)),
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
193
myfsio_core/src/sigv4.rs
Normal file
193
myfsio_core/src/sigv4.rs
Normal file
@@ -0,0 +1,193 @@
|
||||
use hmac::{Hmac, Mac};
|
||||
use lru::LruCache;
|
||||
use parking_lot::Mutex;
|
||||
use percent_encoding::{percent_encode, AsciiSet, NON_ALPHANUMERIC};
|
||||
use pyo3::prelude::*;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::num::NonZeroUsize;
|
||||
use std::sync::LazyLock;
|
||||
use std::time::Instant;
|
||||
|
||||
type HmacSha256 = Hmac<Sha256>;
|
||||
|
||||
struct CacheEntry {
|
||||
key: Vec<u8>,
|
||||
created: Instant,
|
||||
}
|
||||
|
||||
static SIGNING_KEY_CACHE: LazyLock<Mutex<LruCache<(String, String, String, String), CacheEntry>>> =
|
||||
LazyLock::new(|| Mutex::new(LruCache::new(NonZeroUsize::new(256).unwrap())));
|
||||
|
||||
const CACHE_TTL_SECS: u64 = 60;
|
||||
|
||||
const AWS_ENCODE_SET: &AsciiSet = &NON_ALPHANUMERIC
|
||||
.remove(b'-')
|
||||
.remove(b'_')
|
||||
.remove(b'.')
|
||||
.remove(b'~');
|
||||
|
||||
fn hmac_sha256(key: &[u8], msg: &[u8]) -> Vec<u8> {
|
||||
let mut mac = HmacSha256::new_from_slice(key).expect("HMAC key length is always valid");
|
||||
mac.update(msg);
|
||||
mac.finalize().into_bytes().to_vec()
|
||||
}
|
||||
|
||||
fn sha256_hex(data: &[u8]) -> String {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data);
|
||||
hex::encode(hasher.finalize())
|
||||
}
|
||||
|
||||
fn aws_uri_encode(input: &str) -> String {
|
||||
percent_encode(input.as_bytes(), AWS_ENCODE_SET).to_string()
|
||||
}
|
||||
|
||||
fn derive_signing_key_cached(
|
||||
secret_key: &str,
|
||||
date_stamp: &str,
|
||||
region: &str,
|
||||
service: &str,
|
||||
) -> Vec<u8> {
|
||||
let cache_key = (
|
||||
secret_key.to_owned(),
|
||||
date_stamp.to_owned(),
|
||||
region.to_owned(),
|
||||
service.to_owned(),
|
||||
);
|
||||
|
||||
{
|
||||
let mut cache = SIGNING_KEY_CACHE.lock();
|
||||
if let Some(entry) = cache.get(&cache_key) {
|
||||
if entry.created.elapsed().as_secs() < CACHE_TTL_SECS {
|
||||
return entry.key.clone();
|
||||
}
|
||||
cache.pop(&cache_key);
|
||||
}
|
||||
}
|
||||
|
||||
let k_date = hmac_sha256(format!("AWS4{}", secret_key).as_bytes(), date_stamp.as_bytes());
|
||||
let k_region = hmac_sha256(&k_date, region.as_bytes());
|
||||
let k_service = hmac_sha256(&k_region, service.as_bytes());
|
||||
let k_signing = hmac_sha256(&k_service, b"aws4_request");
|
||||
|
||||
{
|
||||
let mut cache = SIGNING_KEY_CACHE.lock();
|
||||
cache.put(
|
||||
cache_key,
|
||||
CacheEntry {
|
||||
key: k_signing.clone(),
|
||||
created: Instant::now(),
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
k_signing
|
||||
}
|
||||
|
||||
fn constant_time_compare_inner(a: &[u8], b: &[u8]) -> bool {
|
||||
if a.len() != b.len() {
|
||||
return false;
|
||||
}
|
||||
let mut result: u8 = 0;
|
||||
for (x, y) in a.iter().zip(b.iter()) {
|
||||
result |= x ^ y;
|
||||
}
|
||||
result == 0
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn verify_sigv4_signature(
|
||||
method: &str,
|
||||
canonical_uri: &str,
|
||||
query_params: Vec<(String, String)>,
|
||||
signed_headers_str: &str,
|
||||
header_values: Vec<(String, String)>,
|
||||
payload_hash: &str,
|
||||
amz_date: &str,
|
||||
date_stamp: &str,
|
||||
region: &str,
|
||||
service: &str,
|
||||
secret_key: &str,
|
||||
provided_signature: &str,
|
||||
) -> bool {
|
||||
let mut sorted_params = query_params;
|
||||
sorted_params.sort_by(|a, b| a.0.cmp(&b.0).then_with(|| a.1.cmp(&b.1)));
|
||||
|
||||
let canonical_query_string = sorted_params
|
||||
.iter()
|
||||
.map(|(k, v)| format!("{}={}", aws_uri_encode(k), aws_uri_encode(v)))
|
||||
.collect::<Vec<_>>()
|
||||
.join("&");
|
||||
|
||||
let mut canonical_headers = String::new();
|
||||
for (name, value) in &header_values {
|
||||
let lower_name = name.to_lowercase();
|
||||
let normalized = value.split_whitespace().collect::<Vec<_>>().join(" ");
|
||||
let final_value = if lower_name == "expect" && normalized.is_empty() {
|
||||
"100-continue"
|
||||
} else {
|
||||
&normalized
|
||||
};
|
||||
canonical_headers.push_str(&lower_name);
|
||||
canonical_headers.push(':');
|
||||
canonical_headers.push_str(final_value);
|
||||
canonical_headers.push('\n');
|
||||
}
|
||||
|
||||
let canonical_request = format!(
|
||||
"{}\n{}\n{}\n{}\n{}\n{}",
|
||||
method, canonical_uri, canonical_query_string, canonical_headers, signed_headers_str, payload_hash
|
||||
);
|
||||
|
||||
let credential_scope = format!("{}/{}/{}/aws4_request", date_stamp, region, service);
|
||||
let cr_hash = sha256_hex(canonical_request.as_bytes());
|
||||
let string_to_sign = format!(
|
||||
"AWS4-HMAC-SHA256\n{}\n{}\n{}",
|
||||
amz_date, credential_scope, cr_hash
|
||||
);
|
||||
|
||||
let signing_key = derive_signing_key_cached(secret_key, date_stamp, region, service);
|
||||
let calculated = hmac_sha256(&signing_key, string_to_sign.as_bytes());
|
||||
let calculated_hex = hex::encode(&calculated);
|
||||
|
||||
constant_time_compare_inner(calculated_hex.as_bytes(), provided_signature.as_bytes())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn derive_signing_key(
|
||||
secret_key: &str,
|
||||
date_stamp: &str,
|
||||
region: &str,
|
||||
service: &str,
|
||||
) -> Vec<u8> {
|
||||
derive_signing_key_cached(secret_key, date_stamp, region, service)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn compute_signature(signing_key: &[u8], string_to_sign: &str) -> String {
|
||||
let sig = hmac_sha256(signing_key, string_to_sign.as_bytes());
|
||||
hex::encode(sig)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn build_string_to_sign(
|
||||
amz_date: &str,
|
||||
credential_scope: &str,
|
||||
canonical_request: &str,
|
||||
) -> String {
|
||||
let cr_hash = sha256_hex(canonical_request.as_bytes());
|
||||
format!(
|
||||
"AWS4-HMAC-SHA256\n{}\n{}\n{}",
|
||||
amz_date, credential_scope, cr_hash
|
||||
)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn constant_time_compare(a: &str, b: &str) -> bool {
|
||||
constant_time_compare_inner(a.as_bytes(), b.as_bytes())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn clear_signing_key_cache() {
|
||||
SIGNING_KEY_CACHE.lock().clear();
|
||||
}
|
||||
817
myfsio_core/src/storage.rs
Normal file
817
myfsio_core/src/storage.rs
Normal file
@@ -0,0 +1,817 @@
|
||||
use pyo3::exceptions::PyIOError;
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::{PyDict, PyList, PyString, PyTuple};
|
||||
use serde_json::Value;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::time::SystemTime;
|
||||
|
||||
const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
|
||||
|
||||
fn system_time_to_epoch(t: SystemTime) -> f64 {
|
||||
t.duration_since(std::time::UNIX_EPOCH)
|
||||
.map(|d| d.as_secs_f64())
|
||||
.unwrap_or(0.0)
|
||||
}
|
||||
|
||||
fn extract_etag_from_meta_bytes(content: &[u8]) -> Option<String> {
|
||||
let marker = b"\"__etag__\"";
|
||||
let idx = content.windows(marker.len()).position(|w| w == marker)?;
|
||||
let after = &content[idx + marker.len()..];
|
||||
let start = after.iter().position(|&b| b == b'"')? + 1;
|
||||
let rest = &after[start..];
|
||||
let end = rest.iter().position(|&b| b == b'"')?;
|
||||
std::str::from_utf8(&rest[..end]).ok().map(|s| s.to_owned())
|
||||
}
|
||||
|
||||
fn has_any_file(root: &str) -> bool {
|
||||
let root_path = Path::new(root);
|
||||
if !root_path.is_dir() {
|
||||
return false;
|
||||
}
|
||||
let mut stack = vec![root_path.to_path_buf()];
|
||||
while let Some(current) = stack.pop() {
|
||||
let entries = match fs::read_dir(¤t) {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_file() {
|
||||
return true;
|
||||
}
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
stack.push(entry.path());
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn write_index_entry(
|
||||
py: Python<'_>,
|
||||
path: &str,
|
||||
entry_name: &str,
|
||||
entry_data_json: &str,
|
||||
) -> PyResult<()> {
|
||||
let path_owned = path.to_owned();
|
||||
let entry_owned = entry_name.to_owned();
|
||||
let data_owned = entry_data_json.to_owned();
|
||||
|
||||
py.detach(move || -> PyResult<()> {
|
||||
let entry_value: Value = serde_json::from_str(&data_owned)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to parse entry data: {}", e)))?;
|
||||
|
||||
if let Some(parent) = Path::new(&path_owned).parent() {
|
||||
let _ = fs::create_dir_all(parent);
|
||||
}
|
||||
|
||||
let mut index_data: serde_json::Map<String, Value> = match fs::read_to_string(&path_owned)
|
||||
{
|
||||
Ok(content) => serde_json::from_str(&content).unwrap_or_default(),
|
||||
Err(_) => serde_json::Map::new(),
|
||||
};
|
||||
|
||||
index_data.insert(entry_owned, entry_value);
|
||||
|
||||
let serialized = serde_json::to_string(&Value::Object(index_data))
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to serialize index: {}", e)))?;
|
||||
|
||||
fs::write(&path_owned, serialized)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write index: {}", e)))?;
|
||||
|
||||
Ok(())
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn delete_index_entry(py: Python<'_>, path: &str, entry_name: &str) -> PyResult<bool> {
|
||||
let path_owned = path.to_owned();
|
||||
let entry_owned = entry_name.to_owned();
|
||||
|
||||
py.detach(move || -> PyResult<bool> {
|
||||
let content = match fs::read_to_string(&path_owned) {
|
||||
Ok(c) => c,
|
||||
Err(_) => return Ok(false),
|
||||
};
|
||||
|
||||
let mut index_data: serde_json::Map<String, Value> =
|
||||
match serde_json::from_str(&content) {
|
||||
Ok(v) => v,
|
||||
Err(_) => return Ok(false),
|
||||
};
|
||||
|
||||
if index_data.remove(&entry_owned).is_none() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
if index_data.is_empty() {
|
||||
let _ = fs::remove_file(&path_owned);
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
let serialized = serde_json::to_string(&Value::Object(index_data))
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to serialize index: {}", e)))?;
|
||||
|
||||
fs::write(&path_owned, serialized)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write index: {}", e)))?;
|
||||
|
||||
Ok(true)
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn check_bucket_contents(
|
||||
py: Python<'_>,
|
||||
bucket_path: &str,
|
||||
version_roots: Vec<String>,
|
||||
multipart_roots: Vec<String>,
|
||||
) -> PyResult<(bool, bool, bool)> {
|
||||
let bucket_owned = bucket_path.to_owned();
|
||||
|
||||
py.detach(move || -> PyResult<(bool, bool, bool)> {
|
||||
let mut has_objects = false;
|
||||
let bucket_p = Path::new(&bucket_owned);
|
||||
if bucket_p.is_dir() {
|
||||
let mut stack = vec![bucket_p.to_path_buf()];
|
||||
'obj_scan: while let Some(current) = stack.pop() {
|
||||
let is_root = current == bucket_p;
|
||||
let entries = match fs::read_dir(¤t) {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if is_root {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
if INTERNAL_FOLDERS.contains(&name) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ft.is_file() && !ft.is_symlink() {
|
||||
has_objects = true;
|
||||
break 'obj_scan;
|
||||
}
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
stack.push(entry.path());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut has_versions = false;
|
||||
for root in &version_roots {
|
||||
if has_versions {
|
||||
break;
|
||||
}
|
||||
has_versions = has_any_file(root);
|
||||
}
|
||||
|
||||
let mut has_multipart = false;
|
||||
for root in &multipart_roots {
|
||||
if has_multipart {
|
||||
break;
|
||||
}
|
||||
has_multipart = has_any_file(root);
|
||||
}
|
||||
|
||||
Ok((has_objects, has_versions, has_multipart))
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn shallow_scan(
|
||||
py: Python<'_>,
|
||||
target_dir: &str,
|
||||
prefix: &str,
|
||||
meta_cache_json: &str,
|
||||
) -> PyResult<Py<PyAny>> {
|
||||
let target_owned = target_dir.to_owned();
|
||||
let prefix_owned = prefix.to_owned();
|
||||
let cache_owned = meta_cache_json.to_owned();
|
||||
|
||||
let result: (
|
||||
Vec<(String, u64, f64, Option<String>)>,
|
||||
Vec<String>,
|
||||
Vec<(String, bool)>,
|
||||
) = py.detach(move || -> PyResult<(
|
||||
Vec<(String, u64, f64, Option<String>)>,
|
||||
Vec<String>,
|
||||
Vec<(String, bool)>,
|
||||
)> {
|
||||
let meta_cache: HashMap<String, String> =
|
||||
serde_json::from_str(&cache_owned).unwrap_or_default();
|
||||
|
||||
let mut files: Vec<(String, u64, f64, Option<String>)> = Vec::new();
|
||||
let mut dirs: Vec<String> = Vec::new();
|
||||
|
||||
let entries = match fs::read_dir(&target_owned) {
|
||||
Ok(e) => e,
|
||||
Err(_) => return Ok((files, dirs, Vec::new())),
|
||||
};
|
||||
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let name = match entry.file_name().into_string() {
|
||||
Ok(n) => n,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if INTERNAL_FOLDERS.contains(&name.as_str()) {
|
||||
continue;
|
||||
}
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
let cp = format!("{}{}/", prefix_owned, name);
|
||||
dirs.push(cp);
|
||||
} else if ft.is_file() && !ft.is_symlink() {
|
||||
let key = format!("{}{}", prefix_owned, name);
|
||||
let md = match entry.metadata() {
|
||||
Ok(m) => m,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let size = md.len();
|
||||
let mtime = md
|
||||
.modified()
|
||||
.map(system_time_to_epoch)
|
||||
.unwrap_or(0.0);
|
||||
let etag = meta_cache.get(&key).cloned();
|
||||
files.push((key, size, mtime, etag));
|
||||
}
|
||||
}
|
||||
|
||||
files.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
dirs.sort();
|
||||
|
||||
let mut merged: Vec<(String, bool)> = Vec::with_capacity(files.len() + dirs.len());
|
||||
let mut fi = 0;
|
||||
let mut di = 0;
|
||||
while fi < files.len() && di < dirs.len() {
|
||||
if files[fi].0 <= dirs[di] {
|
||||
merged.push((files[fi].0.clone(), false));
|
||||
fi += 1;
|
||||
} else {
|
||||
merged.push((dirs[di].clone(), true));
|
||||
di += 1;
|
||||
}
|
||||
}
|
||||
while fi < files.len() {
|
||||
merged.push((files[fi].0.clone(), false));
|
||||
fi += 1;
|
||||
}
|
||||
while di < dirs.len() {
|
||||
merged.push((dirs[di].clone(), true));
|
||||
di += 1;
|
||||
}
|
||||
|
||||
Ok((files, dirs, merged))
|
||||
})?;
|
||||
|
||||
let (files, dirs, merged) = result;
|
||||
|
||||
let dict = PyDict::new(py);
|
||||
|
||||
let files_list = PyList::empty(py);
|
||||
for (key, size, mtime, etag) in &files {
|
||||
let etag_py: Py<PyAny> = match etag {
|
||||
Some(e) => PyString::new(py, e).into_any().unbind(),
|
||||
None => py.None(),
|
||||
};
|
||||
let tuple = PyTuple::new(py, &[
|
||||
PyString::new(py, key).into_any().unbind(),
|
||||
size.into_pyobject(py)?.into_any().unbind(),
|
||||
mtime.into_pyobject(py)?.into_any().unbind(),
|
||||
etag_py,
|
||||
])?;
|
||||
files_list.append(tuple)?;
|
||||
}
|
||||
dict.set_item("files", files_list)?;
|
||||
|
||||
let dirs_list = PyList::empty(py);
|
||||
for d in &dirs {
|
||||
dirs_list.append(PyString::new(py, d))?;
|
||||
}
|
||||
dict.set_item("dirs", dirs_list)?;
|
||||
|
||||
let merged_list = PyList::empty(py);
|
||||
for (key, is_dir) in &merged {
|
||||
let bool_obj: Py<PyAny> = if *is_dir {
|
||||
true.into_pyobject(py)?.to_owned().into_any().unbind()
|
||||
} else {
|
||||
false.into_pyobject(py)?.to_owned().into_any().unbind()
|
||||
};
|
||||
let tuple = PyTuple::new(py, &[
|
||||
PyString::new(py, key).into_any().unbind(),
|
||||
bool_obj,
|
||||
])?;
|
||||
merged_list.append(tuple)?;
|
||||
}
|
||||
dict.set_item("merged_keys", merged_list)?;
|
||||
|
||||
Ok(dict.into_any().unbind())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn bucket_stats_scan(
|
||||
py: Python<'_>,
|
||||
bucket_path: &str,
|
||||
versions_root: &str,
|
||||
) -> PyResult<(u64, u64, u64, u64)> {
|
||||
let bucket_owned = bucket_path.to_owned();
|
||||
let versions_owned = versions_root.to_owned();
|
||||
|
||||
py.detach(move || -> PyResult<(u64, u64, u64, u64)> {
|
||||
let mut object_count: u64 = 0;
|
||||
let mut total_bytes: u64 = 0;
|
||||
|
||||
let bucket_p = Path::new(&bucket_owned);
|
||||
if bucket_p.is_dir() {
|
||||
let mut stack = vec![bucket_p.to_path_buf()];
|
||||
while let Some(current) = stack.pop() {
|
||||
let is_root = current == bucket_p;
|
||||
let entries = match fs::read_dir(¤t) {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if is_root {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
if INTERNAL_FOLDERS.contains(&name) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
stack.push(entry.path());
|
||||
} else if ft.is_file() && !ft.is_symlink() {
|
||||
object_count += 1;
|
||||
if let Ok(md) = entry.metadata() {
|
||||
total_bytes += md.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut version_count: u64 = 0;
|
||||
let mut version_bytes: u64 = 0;
|
||||
|
||||
let versions_p = Path::new(&versions_owned);
|
||||
if versions_p.is_dir() {
|
||||
let mut stack = vec![versions_p.to_path_buf()];
|
||||
while let Some(current) = stack.pop() {
|
||||
let entries = match fs::read_dir(¤t) {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
stack.push(entry.path());
|
||||
} else if ft.is_file() && !ft.is_symlink() {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
if name.ends_with(".bin") {
|
||||
version_count += 1;
|
||||
if let Ok(md) = entry.metadata() {
|
||||
version_bytes += md.len();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((object_count, total_bytes, version_count, version_bytes))
|
||||
})
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (bucket_path, search_root, query, limit))]
|
||||
pub fn search_objects_scan(
|
||||
py: Python<'_>,
|
||||
bucket_path: &str,
|
||||
search_root: &str,
|
||||
query: &str,
|
||||
limit: usize,
|
||||
) -> PyResult<Py<PyAny>> {
|
||||
let bucket_owned = bucket_path.to_owned();
|
||||
let search_owned = search_root.to_owned();
|
||||
let query_owned = query.to_owned();
|
||||
|
||||
let result: (Vec<(String, u64, f64)>, bool) = py.detach(
|
||||
move || -> PyResult<(Vec<(String, u64, f64)>, bool)> {
|
||||
let query_lower = query_owned.to_lowercase();
|
||||
let bucket_len = bucket_owned.len() + 1;
|
||||
let scan_limit = limit * 4;
|
||||
let mut matched: usize = 0;
|
||||
let mut results: Vec<(String, u64, f64)> = Vec::new();
|
||||
|
||||
let search_p = Path::new(&search_owned);
|
||||
if !search_p.is_dir() {
|
||||
return Ok((results, false));
|
||||
}
|
||||
|
||||
let bucket_p = Path::new(&bucket_owned);
|
||||
let mut stack = vec![search_p.to_path_buf()];
|
||||
|
||||
'scan: while let Some(current) = stack.pop() {
|
||||
let is_bucket_root = current == bucket_p;
|
||||
let entries = match fs::read_dir(¤t) {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if is_bucket_root {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
if INTERNAL_FOLDERS.contains(&name) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
stack.push(entry.path());
|
||||
} else if ft.is_file() && !ft.is_symlink() {
|
||||
let full_path = entry.path();
|
||||
let full_str = full_path.to_string_lossy();
|
||||
if full_str.len() <= bucket_len {
|
||||
continue;
|
||||
}
|
||||
let key = full_str[bucket_len..].replace('\\', "/");
|
||||
if key.to_lowercase().contains(&query_lower) {
|
||||
if let Ok(md) = entry.metadata() {
|
||||
let size = md.len();
|
||||
let mtime = md
|
||||
.modified()
|
||||
.map(system_time_to_epoch)
|
||||
.unwrap_or(0.0);
|
||||
results.push((key, size, mtime));
|
||||
matched += 1;
|
||||
}
|
||||
}
|
||||
if matched >= scan_limit {
|
||||
break 'scan;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
results.sort_by(|a, b| a.0.cmp(&b.0));
|
||||
let truncated = results.len() > limit;
|
||||
results.truncate(limit);
|
||||
|
||||
Ok((results, truncated))
|
||||
},
|
||||
)?;
|
||||
|
||||
let (results, truncated) = result;
|
||||
|
||||
let dict = PyDict::new(py);
|
||||
|
||||
let results_list = PyList::empty(py);
|
||||
for (key, size, mtime) in &results {
|
||||
let tuple = PyTuple::new(py, &[
|
||||
PyString::new(py, key).into_any().unbind(),
|
||||
size.into_pyobject(py)?.into_any().unbind(),
|
||||
mtime.into_pyobject(py)?.into_any().unbind(),
|
||||
])?;
|
||||
results_list.append(tuple)?;
|
||||
}
|
||||
dict.set_item("results", results_list)?;
|
||||
dict.set_item("truncated", truncated)?;
|
||||
|
||||
Ok(dict.into_any().unbind())
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn build_object_cache(
|
||||
py: Python<'_>,
|
||||
bucket_path: &str,
|
||||
meta_root: &str,
|
||||
etag_index_path: &str,
|
||||
) -> PyResult<Py<PyAny>> {
|
||||
let bucket_owned = bucket_path.to_owned();
|
||||
let meta_owned = meta_root.to_owned();
|
||||
let index_path_owned = etag_index_path.to_owned();
|
||||
|
||||
let result: (HashMap<String, String>, Vec<(String, u64, f64, Option<String>)>, bool) =
|
||||
py.detach(move || -> PyResult<(
|
||||
HashMap<String, String>,
|
||||
Vec<(String, u64, f64, Option<String>)>,
|
||||
bool,
|
||||
)> {
|
||||
let mut meta_cache: HashMap<String, String> = HashMap::new();
|
||||
let mut index_mtime: f64 = 0.0;
|
||||
let mut etag_cache_changed = false;
|
||||
|
||||
let index_p = Path::new(&index_path_owned);
|
||||
if index_p.is_file() {
|
||||
if let Ok(md) = fs::metadata(&index_path_owned) {
|
||||
index_mtime = md
|
||||
.modified()
|
||||
.map(system_time_to_epoch)
|
||||
.unwrap_or(0.0);
|
||||
}
|
||||
if let Ok(content) = fs::read_to_string(&index_path_owned) {
|
||||
if let Ok(parsed) = serde_json::from_str::<HashMap<String, String>>(&content) {
|
||||
meta_cache = parsed;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let meta_p = Path::new(&meta_owned);
|
||||
let mut needs_rebuild = false;
|
||||
|
||||
if meta_p.is_dir() && index_mtime > 0.0 {
|
||||
fn check_newer(dir: &Path, index_mtime: f64) -> bool {
|
||||
let entries = match fs::read_dir(dir) {
|
||||
Ok(e) => e,
|
||||
Err(_) => return false,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
if check_newer(&entry.path(), index_mtime) {
|
||||
return true;
|
||||
}
|
||||
} else if ft.is_file() {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
if name.ends_with(".meta.json") || name == "_index.json" {
|
||||
if let Ok(md) = entry.metadata() {
|
||||
let mt = md
|
||||
.modified()
|
||||
.map(system_time_to_epoch)
|
||||
.unwrap_or(0.0);
|
||||
if mt > index_mtime {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
needs_rebuild = check_newer(meta_p, index_mtime);
|
||||
} else if meta_cache.is_empty() {
|
||||
needs_rebuild = true;
|
||||
}
|
||||
|
||||
if needs_rebuild && meta_p.is_dir() {
|
||||
let meta_str = meta_owned.clone();
|
||||
let meta_len = meta_str.len() + 1;
|
||||
let mut index_files: Vec<String> = Vec::new();
|
||||
let mut legacy_meta_files: Vec<(String, String)> = Vec::new();
|
||||
|
||||
fn collect_meta(
|
||||
dir: &Path,
|
||||
meta_len: usize,
|
||||
index_files: &mut Vec<String>,
|
||||
legacy_meta_files: &mut Vec<(String, String)>,
|
||||
) {
|
||||
let entries = match fs::read_dir(dir) {
|
||||
Ok(e) => e,
|
||||
Err(_) => return,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
collect_meta(&entry.path(), meta_len, index_files, legacy_meta_files);
|
||||
} else if ft.is_file() {
|
||||
if let Some(name) = entry.file_name().to_str() {
|
||||
let full = entry.path().to_string_lossy().to_string();
|
||||
if name == "_index.json" {
|
||||
index_files.push(full);
|
||||
} else if name.ends_with(".meta.json") {
|
||||
if full.len() > meta_len {
|
||||
let rel = &full[meta_len..];
|
||||
let key = if rel.len() > 10 {
|
||||
rel[..rel.len() - 10].replace('\\', "/")
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
legacy_meta_files.push((key, full));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
collect_meta(
|
||||
meta_p,
|
||||
meta_len,
|
||||
&mut index_files,
|
||||
&mut legacy_meta_files,
|
||||
);
|
||||
|
||||
meta_cache.clear();
|
||||
|
||||
for idx_path in &index_files {
|
||||
if let Ok(content) = fs::read_to_string(idx_path) {
|
||||
if let Ok(idx_data) = serde_json::from_str::<HashMap<String, Value>>(&content) {
|
||||
let rel_dir = if idx_path.len() > meta_len {
|
||||
let r = &idx_path[meta_len..];
|
||||
r.replace('\\', "/")
|
||||
} else {
|
||||
String::new()
|
||||
};
|
||||
let dir_prefix = if rel_dir.ends_with("/_index.json") {
|
||||
&rel_dir[..rel_dir.len() - "/_index.json".len()]
|
||||
} else {
|
||||
""
|
||||
};
|
||||
for (entry_name, entry_data) in &idx_data {
|
||||
let key = if dir_prefix.is_empty() {
|
||||
entry_name.clone()
|
||||
} else {
|
||||
format!("{}/{}", dir_prefix, entry_name)
|
||||
};
|
||||
if let Some(meta_obj) = entry_data.get("metadata") {
|
||||
if let Some(etag) = meta_obj.get("__etag__") {
|
||||
if let Some(etag_str) = etag.as_str() {
|
||||
meta_cache.insert(key, etag_str.to_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (key, path) in &legacy_meta_files {
|
||||
if meta_cache.contains_key(key) {
|
||||
continue;
|
||||
}
|
||||
if let Ok(content) = fs::read(path) {
|
||||
if let Some(etag) = extract_etag_from_meta_bytes(&content) {
|
||||
meta_cache.insert(key.clone(), etag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
etag_cache_changed = true;
|
||||
}
|
||||
|
||||
let bucket_p = Path::new(&bucket_owned);
|
||||
let bucket_len = bucket_owned.len() + 1;
|
||||
let mut objects: Vec<(String, u64, f64, Option<String>)> = Vec::new();
|
||||
|
||||
if bucket_p.is_dir() {
|
||||
let mut stack = vec![bucket_p.to_path_buf()];
|
||||
while let Some(current) = stack.pop() {
|
||||
let entries = match fs::read_dir(¤t) {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
for entry_result in entries {
|
||||
let entry = match entry_result {
|
||||
Ok(e) => e,
|
||||
Err(_) => continue,
|
||||
};
|
||||
let ft = match entry.file_type() {
|
||||
Ok(ft) => ft,
|
||||
Err(_) => continue,
|
||||
};
|
||||
if ft.is_dir() && !ft.is_symlink() {
|
||||
let full = entry.path();
|
||||
let full_str = full.to_string_lossy();
|
||||
if full_str.len() > bucket_len {
|
||||
let first_part: &str = if let Some(sep_pos) =
|
||||
full_str[bucket_len..].find(|c: char| c == '\\' || c == '/')
|
||||
{
|
||||
&full_str[bucket_len..bucket_len + sep_pos]
|
||||
} else {
|
||||
&full_str[bucket_len..]
|
||||
};
|
||||
if INTERNAL_FOLDERS.contains(&first_part) {
|
||||
continue;
|
||||
}
|
||||
} else if let Some(name) = entry.file_name().to_str() {
|
||||
if INTERNAL_FOLDERS.contains(&name) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
stack.push(full);
|
||||
} else if ft.is_file() && !ft.is_symlink() {
|
||||
let full = entry.path();
|
||||
let full_str = full.to_string_lossy();
|
||||
if full_str.len() <= bucket_len {
|
||||
continue;
|
||||
}
|
||||
let rel = &full_str[bucket_len..];
|
||||
let first_part: &str =
|
||||
if let Some(sep_pos) = rel.find(|c: char| c == '\\' || c == '/') {
|
||||
&rel[..sep_pos]
|
||||
} else {
|
||||
rel
|
||||
};
|
||||
if INTERNAL_FOLDERS.contains(&first_part) {
|
||||
continue;
|
||||
}
|
||||
let key = rel.replace('\\', "/");
|
||||
if let Ok(md) = entry.metadata() {
|
||||
let size = md.len();
|
||||
let mtime = md
|
||||
.modified()
|
||||
.map(system_time_to_epoch)
|
||||
.unwrap_or(0.0);
|
||||
let etag = meta_cache.get(&key).cloned();
|
||||
objects.push((key, size, mtime, etag));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok((meta_cache, objects, etag_cache_changed))
|
||||
})?;
|
||||
|
||||
let (meta_cache, objects, etag_cache_changed) = result;
|
||||
|
||||
let dict = PyDict::new(py);
|
||||
|
||||
let cache_dict = PyDict::new(py);
|
||||
for (k, v) in &meta_cache {
|
||||
cache_dict.set_item(k, v)?;
|
||||
}
|
||||
dict.set_item("etag_cache", cache_dict)?;
|
||||
|
||||
let objects_list = PyList::empty(py);
|
||||
for (key, size, mtime, etag) in &objects {
|
||||
let etag_py: Py<PyAny> = match etag {
|
||||
Some(e) => PyString::new(py, e).into_any().unbind(),
|
||||
None => py.None(),
|
||||
};
|
||||
let tuple = PyTuple::new(py, &[
|
||||
PyString::new(py, key).into_any().unbind(),
|
||||
size.into_pyobject(py)?.into_any().unbind(),
|
||||
mtime.into_pyobject(py)?.into_any().unbind(),
|
||||
etag_py,
|
||||
])?;
|
||||
objects_list.append(tuple)?;
|
||||
}
|
||||
dict.set_item("objects", objects_list)?;
|
||||
dict.set_item("etag_cache_changed", etag_cache_changed)?;
|
||||
|
||||
Ok(dict.into_any().unbind())
|
||||
}
|
||||
112
myfsio_core/src/streaming.rs
Normal file
112
myfsio_core/src/streaming.rs
Normal file
@@ -0,0 +1,112 @@
|
||||
use md5::{Digest, Md5};
|
||||
use pyo3::exceptions::{PyIOError, PyValueError};
|
||||
use pyo3::prelude::*;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{Read, Write};
|
||||
use uuid::Uuid;
|
||||
|
||||
const DEFAULT_CHUNK_SIZE: usize = 262144;
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (stream, tmp_dir, chunk_size=DEFAULT_CHUNK_SIZE))]
|
||||
pub fn stream_to_file_with_md5(
|
||||
py: Python<'_>,
|
||||
stream: &Bound<'_, PyAny>,
|
||||
tmp_dir: &str,
|
||||
chunk_size: usize,
|
||||
) -> PyResult<(String, String, u64)> {
|
||||
let chunk_size = if chunk_size == 0 {
|
||||
DEFAULT_CHUNK_SIZE
|
||||
} else {
|
||||
chunk_size
|
||||
};
|
||||
|
||||
fs::create_dir_all(tmp_dir)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to create tmp dir: {}", e)))?;
|
||||
|
||||
let tmp_name = format!("{}.tmp", Uuid::new_v4().as_hyphenated());
|
||||
let tmp_path_buf = std::path::PathBuf::from(tmp_dir).join(&tmp_name);
|
||||
let tmp_path = tmp_path_buf.to_string_lossy().into_owned();
|
||||
|
||||
let mut file = File::create(&tmp_path)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to create temp file: {}", e)))?;
|
||||
let mut hasher = Md5::new();
|
||||
let mut total_bytes: u64 = 0;
|
||||
|
||||
let result: PyResult<()> = (|| {
|
||||
loop {
|
||||
let chunk: Vec<u8> = stream.call_method1("read", (chunk_size,))?.extract()?;
|
||||
if chunk.is_empty() {
|
||||
break;
|
||||
}
|
||||
hasher.update(&chunk);
|
||||
file.write_all(&chunk)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write: {}", e)))?;
|
||||
total_bytes += chunk.len() as u64;
|
||||
|
||||
py.check_signals()?;
|
||||
}
|
||||
file.sync_all()
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to fsync: {}", e)))?;
|
||||
Ok(())
|
||||
})();
|
||||
|
||||
if let Err(e) = result {
|
||||
drop(file);
|
||||
let _ = fs::remove_file(&tmp_path);
|
||||
return Err(e);
|
||||
}
|
||||
|
||||
drop(file);
|
||||
|
||||
let md5_hex = format!("{:x}", hasher.finalize());
|
||||
Ok((tmp_path, md5_hex, total_bytes))
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn assemble_parts_with_md5(
|
||||
py: Python<'_>,
|
||||
part_paths: Vec<String>,
|
||||
dest_path: &str,
|
||||
) -> PyResult<String> {
|
||||
if part_paths.is_empty() {
|
||||
return Err(PyValueError::new_err("No parts to assemble"));
|
||||
}
|
||||
|
||||
let dest = dest_path.to_owned();
|
||||
let parts = part_paths;
|
||||
|
||||
py.detach(move || {
|
||||
if let Some(parent) = std::path::Path::new(&dest).parent() {
|
||||
fs::create_dir_all(parent)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to create dest dir: {}", e)))?;
|
||||
}
|
||||
|
||||
let mut target = File::create(&dest)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to create dest file: {}", e)))?;
|
||||
let mut hasher = Md5::new();
|
||||
let mut buf = vec![0u8; 1024 * 1024];
|
||||
|
||||
for part_path in &parts {
|
||||
let mut part = File::open(part_path)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to open part {}: {}", part_path, e)))?;
|
||||
loop {
|
||||
let n = part
|
||||
.read(&mut buf)
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to read part: {}", e)))?;
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buf[..n]);
|
||||
target
|
||||
.write_all(&buf[..n])
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to write: {}", e)))?;
|
||||
}
|
||||
}
|
||||
|
||||
target.sync_all()
|
||||
.map_err(|e| PyIOError::new_err(format!("Failed to fsync: {}", e)))?;
|
||||
|
||||
Ok(format!("{:x}", hasher.finalize()))
|
||||
})
|
||||
}
|
||||
149
myfsio_core/src/validation.rs
Normal file
149
myfsio_core/src/validation.rs
Normal file
@@ -0,0 +1,149 @@
|
||||
use pyo3::prelude::*;
|
||||
use std::sync::LazyLock;
|
||||
use unicode_normalization::UnicodeNormalization;
|
||||
|
||||
const WINDOWS_RESERVED: &[&str] = &[
|
||||
"CON", "PRN", "AUX", "NUL", "COM0", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
|
||||
"COM8", "COM9", "LPT0", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
|
||||
"LPT9",
|
||||
];
|
||||
|
||||
const WINDOWS_ILLEGAL_CHARS: &[char] = &['<', '>', ':', '"', '/', '\\', '|', '?', '*'];
|
||||
|
||||
const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
|
||||
const SYSTEM_ROOT: &str = ".myfsio.sys";
|
||||
|
||||
static IP_REGEX: LazyLock<regex::Regex> =
|
||||
LazyLock::new(|| regex::Regex::new(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$").unwrap());
|
||||
|
||||
#[pyfunction]
|
||||
#[pyo3(signature = (object_key, max_length_bytes=1024, is_windows=false, reserved_prefixes=None))]
|
||||
pub fn validate_object_key(
|
||||
object_key: &str,
|
||||
max_length_bytes: usize,
|
||||
is_windows: bool,
|
||||
reserved_prefixes: Option<Vec<String>>,
|
||||
) -> PyResult<Option<String>> {
|
||||
if object_key.is_empty() {
|
||||
return Ok(Some("Object key required".to_string()));
|
||||
}
|
||||
|
||||
if object_key.contains('\0') {
|
||||
return Ok(Some("Object key contains null bytes".to_string()));
|
||||
}
|
||||
|
||||
let normalized: String = object_key.nfc().collect();
|
||||
|
||||
if normalized.len() > max_length_bytes {
|
||||
return Ok(Some(format!(
|
||||
"Object key exceeds maximum length of {} bytes",
|
||||
max_length_bytes
|
||||
)));
|
||||
}
|
||||
|
||||
if normalized.starts_with('/') || normalized.starts_with('\\') {
|
||||
return Ok(Some("Object key cannot start with a slash".to_string()));
|
||||
}
|
||||
|
||||
let parts: Vec<&str> = if cfg!(windows) || is_windows {
|
||||
normalized.split(['/', '\\']).collect()
|
||||
} else {
|
||||
normalized.split('/').collect()
|
||||
};
|
||||
|
||||
for part in &parts {
|
||||
if part.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if *part == ".." {
|
||||
return Ok(Some(
|
||||
"Object key contains parent directory references".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if *part == "." {
|
||||
return Ok(Some("Object key contains invalid segments".to_string()));
|
||||
}
|
||||
|
||||
if part.chars().any(|c| (c as u32) < 32) {
|
||||
return Ok(Some(
|
||||
"Object key contains control characters".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
if is_windows {
|
||||
if part.chars().any(|c| WINDOWS_ILLEGAL_CHARS.contains(&c)) {
|
||||
return Ok(Some(
|
||||
"Object key contains characters not supported on Windows filesystems"
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
if part.ends_with(' ') || part.ends_with('.') {
|
||||
return Ok(Some(
|
||||
"Object key segments cannot end with spaces or periods on Windows".to_string(),
|
||||
));
|
||||
}
|
||||
let trimmed = part.trim_end_matches(['.', ' ']).to_uppercase();
|
||||
if WINDOWS_RESERVED.contains(&trimmed.as_str()) {
|
||||
return Ok(Some(format!("Invalid filename segment: {}", part)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let non_empty_parts: Vec<&str> = parts.iter().filter(|p| !p.is_empty()).copied().collect();
|
||||
if let Some(top) = non_empty_parts.first() {
|
||||
if INTERNAL_FOLDERS.contains(top) || *top == SYSTEM_ROOT {
|
||||
return Ok(Some("Object key uses a reserved prefix".to_string()));
|
||||
}
|
||||
|
||||
if let Some(ref prefixes) = reserved_prefixes {
|
||||
for prefix in prefixes {
|
||||
if *top == prefix.as_str() {
|
||||
return Ok(Some("Object key uses a reserved prefix".to_string()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
pub fn validate_bucket_name(bucket_name: &str) -> Option<String> {
|
||||
let len = bucket_name.len();
|
||||
if len < 3 || len > 63 {
|
||||
return Some("Bucket name must be between 3 and 63 characters".to_string());
|
||||
}
|
||||
|
||||
let bytes = bucket_name.as_bytes();
|
||||
if !bytes[0].is_ascii_lowercase() && !bytes[0].is_ascii_digit() {
|
||||
return Some(
|
||||
"Bucket name must start and end with a lowercase letter or digit".to_string(),
|
||||
);
|
||||
}
|
||||
if !bytes[len - 1].is_ascii_lowercase() && !bytes[len - 1].is_ascii_digit() {
|
||||
return Some(
|
||||
"Bucket name must start and end with a lowercase letter or digit".to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
for &b in bytes {
|
||||
if !b.is_ascii_lowercase() && !b.is_ascii_digit() && b != b'.' && b != b'-' {
|
||||
return Some(
|
||||
"Bucket name can only contain lowercase letters, digits, dots, and hyphens"
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if bucket_name.contains("..") {
|
||||
return Some("Bucket name must not contain consecutive periods".to_string());
|
||||
}
|
||||
|
||||
if IP_REGEX.is_match(bucket_name) {
|
||||
return Some("Bucket name must not be formatted as an IP address".to_string());
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
@@ -1,3 +1,5 @@
|
||||
[pytest]
|
||||
testpaths = tests
|
||||
norecursedirs = data .git __pycache__ .venv
|
||||
markers =
|
||||
integration: marks tests as integration tests (may require external services)
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user