8 Commits

574 changed files with 5123 additions and 33245 deletions

View File

@@ -1,33 +1,25 @@
FROM python:3.14.3-slim # syntax=docker/dockerfile:1.7
FROM python:3.11-slim
ENV PYTHONDONTWRITEBYTECODE=1 \ ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 PYTHONUNBUFFERED=1
WORKDIR /app WORKDIR /app
RUN apt-get update \ # Install build deps for any wheels that need compilation, then clean up
&& apt-get install -y --no-install-recommends build-essential curl \ RUN apt-get update \
&& curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --profile minimal \ && apt-get install -y --no-install-recommends build-essential \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
ENV PATH="/root/.cargo/bin:${PATH}"
COPY requirements.txt ./ COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
COPY . . COPY . .
RUN pip install --no-cache-dir maturin \ # Make entrypoint executable
&& cd myfsio_core \
&& maturin build --release \
&& pip install target/wheels/*.whl \
&& cd .. \
&& rm -rf myfsio_core/target \
&& pip uninstall -y maturin \
&& rustup self uninstall -y
RUN chmod +x docker-entrypoint.sh RUN chmod +x docker-entrypoint.sh
# Create data directory and set permissions
RUN mkdir -p /app/data \ RUN mkdir -p /app/data \
&& useradd -m -u 1000 myfsio \ && useradd -m -u 1000 myfsio \
&& chown -R myfsio:myfsio /app && chown -R myfsio:myfsio /app
@@ -40,6 +32,6 @@ ENV APP_HOST=0.0.0.0 \
FLASK_DEBUG=0 FLASK_DEBUG=0
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD python -c "import requests; requests.get('http://localhost:5000/myfsio/health', timeout=2)" CMD python -c "import requests; requests.get('http://localhost:5000/healthz', timeout=2)"
CMD ["./docker-entrypoint.sh"] CMD ["./docker-entrypoint.sh"]

661
LICENSE
View File

@@ -1,661 +0,0 @@
GNU AFFERO GENERAL PUBLIC LICENSE
Version 3, 19 November 2007
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
Everyone is permitted to copy and distribute verbatim copies
of this license document, but changing it is not allowed.
Preamble
The GNU Affero General Public License is a free, copyleft license for
software and other kinds of works, specifically designed to ensure
cooperation with the community in the case of network server software.
The licenses for most software and other practical works are designed
to take away your freedom to share and change the works. By contrast,
our General Public Licenses are intended to guarantee your freedom to
share and change all versions of a program--to make sure it remains free
software for all its users.
When we speak of free software, we are referring to freedom, not
price. Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
them if you wish), that you receive source code or can get it if you
want it, that you can change the software or use pieces of it in new
free programs, and that you know you can do these things.
Developers that use our General Public Licenses protect your rights
with two steps: (1) assert copyright on the software, and (2) offer
you this License which gives you legal permission to copy, distribute
and/or modify the software.
A secondary benefit of defending all users' freedom is that
improvements made in alternate versions of the program, if they
receive widespread use, become available for other developers to
incorporate. Many developers of free software are heartened and
encouraged by the resulting cooperation. However, in the case of
software used on network servers, this result may fail to come about.
The GNU General Public License permits making a modified version and
letting the public access it on a server without ever releasing its
source code to the public.
The GNU Affero General Public License is designed specifically to
ensure that, in such cases, the modified source code becomes available
to the community. It requires the operator of a network server to
provide the source code of the modified version running there to the
users of that server. Therefore, public use of a modified version, on
a publicly accessible server, gives the public access to the source
code of the modified version.
An older license, called the Affero General Public License and
published by Affero, was designed to accomplish similar goals. This is
a different license, not a version of the Affero GPL, but Affero has
released a new version of the Affero GPL which permits relicensing under
this license.
The precise terms and conditions for copying, distribution and
modification follow.
TERMS AND CONDITIONS
0. Definitions.
"This License" refers to version 3 of the GNU Affero General Public License.
"Copyright" also means copyright-like laws that apply to other kinds of
works, such as semiconductor masks.
"The Program" refers to any copyrightable work licensed under this
License. Each licensee is addressed as "you". "Licensees" and
"recipients" may be individuals or organizations.
To "modify" a work means to copy from or adapt all or part of the work
in a fashion requiring copyright permission, other than the making of an
exact copy. The resulting work is called a "modified version" of the
earlier work or a work "based on" the earlier work.
A "covered work" means either the unmodified Program or a work based
on the Program.
To "propagate" a work means to do anything with it that, without
permission, would make you directly or secondarily liable for
infringement under applicable copyright law, except executing it on a
computer or modifying a private copy. Propagation includes copying,
distribution (with or without modification), making available to the
public, and in some countries other activities as well.
To "convey" a work means any kind of propagation that enables other
parties to make or receive copies. Mere interaction with a user through
a computer network, with no transfer of a copy, is not conveying.
An interactive user interface displays "Appropriate Legal Notices"
to the extent that it includes a convenient and prominently visible
feature that (1) displays an appropriate copyright notice, and (2)
tells the user that there is no warranty for the work (except to the
extent that warranties are provided), that licensees may convey the
work under this License, and how to view a copy of this License. If
the interface presents a list of user commands or options, such as a
menu, a prominent item in the list meets this criterion.
1. Source Code.
The "source code" for a work means the preferred form of the work
for making modifications to it. "Object code" means any non-source
form of a work.
A "Standard Interface" means an interface that either is an official
standard defined by a recognized standards body, or, in the case of
interfaces specified for a particular programming language, one that
is widely used among developers working in that language.
The "System Libraries" of an executable work include anything, other
than the work as a whole, that (a) is included in the normal form of
packaging a Major Component, but which is not part of that Major
Component, and (b) serves only to enable use of the work with that
Major Component, or to implement a Standard Interface for which an
implementation is available to the public in source code form. A
"Major Component", in this context, means a major essential component
(kernel, window system, and so on) of the specific operating system
(if any) on which the executable work runs, or a compiler used to
produce the work, or an object code interpreter used to run it.
The "Corresponding Source" for a work in object code form means all
the source code needed to generate, install, and (for an executable
work) run the object code and to modify the work, including scripts to
control those activities. However, it does not include the work's
System Libraries, or general-purpose tools or generally available free
programs which are used unmodified in performing those activities but
which are not part of the work. For example, Corresponding Source
includes interface definition files associated with source files for
the work, and the source code for shared libraries and dynamically
linked subprograms that the work is specifically designed to require,
such as by intimate data communication or control flow between those
subprograms and other parts of the work.
The Corresponding Source need not include anything that users
can regenerate automatically from other parts of the Corresponding
Source.
The Corresponding Source for a work in source code form is that
same work.
2. Basic Permissions.
All rights granted under this License are granted for the term of
copyright on the Program, and are irrevocable provided the stated
conditions are met. This License explicitly affirms your unlimited
permission to run the unmodified Program. The output from running a
covered work is covered by this License only if the output, given its
content, constitutes a covered work. This License acknowledges your
rights of fair use or other equivalent, as provided by copyright law.
You may make, run and propagate covered works that you do not
convey, without conditions so long as your license otherwise remains
in force. You may convey covered works to others for the sole purpose
of having them make modifications exclusively for you, or provide you
with facilities for running those works, provided that you comply with
the terms of this License in conveying all material for which you do
not control copyright. Those thus making or running the covered works
for you must do so exclusively on your behalf, under your direction
and control, on terms that prohibit them from making any copies of
your copyrighted material outside their relationship with you.
Conveying under any other circumstances is permitted solely under
the conditions stated below. Sublicensing is not allowed; section 10
makes it unnecessary.
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
No covered work shall be deemed part of an effective technological
measure under any applicable law fulfilling obligations under article
11 of the WIPO copyright treaty adopted on 20 December 1996, or
similar laws prohibiting or restricting circumvention of such
measures.
When you convey a covered work, you waive any legal power to forbid
circumvention of technological measures to the extent such circumvention
is effected by exercising rights under this License with respect to
the covered work, and you disclaim any intention to limit operation or
modification of the work as a means of enforcing, against the work's
users, your or third parties' legal rights to forbid circumvention of
technological measures.
4. Conveying Verbatim Copies.
You may convey verbatim copies of the Program's source code as you
receive it, in any medium, provided that you conspicuously and
appropriately publish on each copy an appropriate copyright notice;
keep intact all notices stating that this License and any
non-permissive terms added in accord with section 7 apply to the code;
keep intact all notices of the absence of any warranty; and give all
recipients a copy of this License along with the Program.
You may charge any price or no price for each copy that you convey,
and you may offer support or warranty protection for a fee.
5. Conveying Modified Source Versions.
You may convey a work based on the Program, or the modifications to
produce it from the Program, in the form of source code under the
terms of section 4, provided that you also meet all of these conditions:
a) The work must carry prominent notices stating that you modified
it, and giving a relevant date.
b) The work must carry prominent notices stating that it is
released under this License and any conditions added under section
7. This requirement modifies the requirement in section 4 to
"keep intact all notices".
c) You must license the entire work, as a whole, under this
License to anyone who comes into possession of a copy. This
License will therefore apply, along with any applicable section 7
additional terms, to the whole of the work, and all its parts,
regardless of how they are packaged. This License gives no
permission to license the work in any other way, but it does not
invalidate such permission if you have separately received it.
d) If the work has interactive user interfaces, each must display
Appropriate Legal Notices; however, if the Program has interactive
interfaces that do not display Appropriate Legal Notices, your
work need not make them do so.
A compilation of a covered work with other separate and independent
works, which are not by their nature extensions of the covered work,
and which are not combined with it such as to form a larger program,
in or on a volume of a storage or distribution medium, is called an
"aggregate" if the compilation and its resulting copyright are not
used to limit the access or legal rights of the compilation's users
beyond what the individual works permit. Inclusion of a covered work
in an aggregate does not cause this License to apply to the other
parts of the aggregate.
6. Conveying Non-Source Forms.
You may convey a covered work in object code form under the terms
of sections 4 and 5, provided that you also convey the
machine-readable Corresponding Source under the terms of this License,
in one of these ways:
a) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by the
Corresponding Source fixed on a durable physical medium
customarily used for software interchange.
b) Convey the object code in, or embodied in, a physical product
(including a physical distribution medium), accompanied by a
written offer, valid for at least three years and valid for as
long as you offer spare parts or customer support for that product
model, to give anyone who possesses the object code either (1) a
copy of the Corresponding Source for all the software in the
product that is covered by this License, on a durable physical
medium customarily used for software interchange, for a price no
more than your reasonable cost of physically performing this
conveying of source, or (2) access to copy the
Corresponding Source from a network server at no charge.
c) Convey individual copies of the object code with a copy of the
written offer to provide the Corresponding Source. This
alternative is allowed only occasionally and noncommercially, and
only if you received the object code with such an offer, in accord
with subsection 6b.
d) Convey the object code by offering access from a designated
place (gratis or for a charge), and offer equivalent access to the
Corresponding Source in the same way through the same place at no
further charge. You need not require recipients to copy the
Corresponding Source along with the object code. If the place to
copy the object code is a network server, the Corresponding Source
may be on a different server (operated by you or a third party)
that supports equivalent copying facilities, provided you maintain
clear directions next to the object code saying where to find the
Corresponding Source. Regardless of what server hosts the
Corresponding Source, you remain obligated to ensure that it is
available for as long as needed to satisfy these requirements.
e) Convey the object code using peer-to-peer transmission, provided
you inform other peers where the object code and Corresponding
Source of the work are being offered to the general public at no
charge under subsection 6d.
A separable portion of the object code, whose source code is excluded
from the Corresponding Source as a System Library, need not be
included in conveying the object code work.
A "User Product" is either (1) a "consumer product", which means any
tangible personal property which is normally used for personal, family,
or household purposes, or (2) anything designed or sold for incorporation
into a dwelling. In determining whether a product is a consumer product,
doubtful cases shall be resolved in favor of coverage. For a particular
product received by a particular user, "normally used" refers to a
typical or common use of that class of product, regardless of the status
of the particular user or of the way in which the particular user
actually uses, or expects or is expected to use, the product. A product
is a consumer product regardless of whether the product has substantial
commercial, industrial or non-consumer uses, unless such uses represent
the only significant mode of use of the product.
"Installation Information" for a User Product means any methods,
procedures, authorization keys, or other information required to install
and execute modified versions of a covered work in that User Product from
a modified version of its Corresponding Source. The information must
suffice to ensure that the continued functioning of the modified object
code is in no case prevented or interfered with solely because
modification has been made.
If you convey an object code work under this section in, or with, or
specifically for use in, a User Product, and the conveying occurs as
part of a transaction in which the right of possession and use of the
User Product is transferred to the recipient in perpetuity or for a
fixed term (regardless of how the transaction is characterized), the
Corresponding Source conveyed under this section must be accompanied
by the Installation Information. But this requirement does not apply
if neither you nor any third party retains the ability to install
modified object code on the User Product (for example, the work has
been installed in ROM).
The requirement to provide Installation Information does not include a
requirement to continue to provide support service, warranty, or updates
for a work that has been modified or installed by the recipient, or for
the User Product in which it has been modified or installed. Access to a
network may be denied when the modification itself materially and
adversely affects the operation of the network or violates the rules and
protocols for communication across the network.
Corresponding Source conveyed, and Installation Information provided,
in accord with this section must be in a format that is publicly
documented (and with an implementation available to the public in
source code form), and must require no special password or key for
unpacking, reading or copying.
7. Additional Terms.
"Additional permissions" are terms that supplement the terms of this
License by making exceptions from one or more of its conditions.
Additional permissions that are applicable to the entire Program shall
be treated as though they were included in this License, to the extent
that they are valid under applicable law. If additional permissions
apply only to part of the Program, that part may be used separately
under those permissions, but the entire Program remains governed by
this License without regard to the additional permissions.
When you convey a copy of a covered work, you may at your option
remove any additional permissions from that copy, or from any part of
it. (Additional permissions may be written to require their own
removal in certain cases when you modify the work.) You may place
additional permissions on material, added by you to a covered work,
for which you have or can give appropriate copyright permission.
Notwithstanding any other provision of this License, for material you
add to a covered work, you may (if authorized by the copyright holders of
that material) supplement the terms of this License with terms:
a) Disclaiming warranty or limiting liability differently from the
terms of sections 15 and 16 of this License; or
b) Requiring preservation of specified reasonable legal notices or
author attributions in that material or in the Appropriate Legal
Notices displayed by works containing it; or
c) Prohibiting misrepresentation of the origin of that material, or
requiring that modified versions of such material be marked in
reasonable ways as different from the original version; or
d) Limiting the use for publicity purposes of names of licensors or
authors of the material; or
e) Declining to grant rights under trademark law for use of some
trade names, trademarks, or service marks; or
f) Requiring indemnification of licensors and authors of that
material by anyone who conveys the material (or modified versions of
it) with contractual assumptions of liability to the recipient, for
any liability that these contractual assumptions directly impose on
those licensors and authors.
All other non-permissive additional terms are considered "further
restrictions" within the meaning of section 10. If the Program as you
received it, or any part of it, contains a notice stating that it is
governed by this License along with a term that is a further
restriction, you may remove that term. If a license document contains
a further restriction but permits relicensing or conveying under this
License, you may add to a covered work material governed by the terms
of that license document, provided that the further restriction does
not survive such relicensing or conveying.
If you add terms to a covered work in accord with this section, you
must place, in the relevant source files, a statement of the
additional terms that apply to those files, or a notice indicating
where to find the applicable terms.
Additional terms, permissive or non-permissive, may be stated in the
form of a separately written license, or stated as exceptions;
the above requirements apply either way.
8. Termination.
You may not propagate or modify a covered work except as expressly
provided under this License. Any attempt otherwise to propagate or
modify it is void, and will automatically terminate your rights under
this License (including any patent licenses granted under the third
paragraph of section 11).
However, if you cease all violation of this License, then your
license from a particular copyright holder is reinstated (a)
provisionally, unless and until the copyright holder explicitly and
finally terminates your license, and (b) permanently, if the copyright
holder fails to notify you of the violation by some reasonable means
prior to 60 days after the cessation.
Moreover, your license from a particular copyright holder is
reinstated permanently if the copyright holder notifies you of the
violation by some reasonable means, this is the first time you have
received notice of violation of this License (for any work) from that
copyright holder, and you cure the violation prior to 30 days after
your receipt of the notice.
Termination of your rights under this section does not terminate the
licenses of parties who have received copies or rights from you under
this License. If your rights have been terminated and not permanently
reinstated, you do not qualify to receive new licenses for the same
material under section 10.
9. Acceptance Not Required for Having Copies.
You are not required to accept this License in order to receive or
run a copy of the Program. Ancillary propagation of a covered work
occurring solely as a consequence of using peer-to-peer transmission
to receive a copy likewise does not require acceptance. However,
nothing other than this License grants you permission to propagate or
modify any covered work. These actions infringe copyright if you do
not accept this License. Therefore, by modifying or propagating a
covered work, you indicate your acceptance of this License to do so.
10. Automatic Licensing of Downstream Recipients.
Each time you convey a covered work, the recipient automatically
receives a license from the original licensors, to run, modify and
propagate that work, subject to this License. You are not responsible
for enforcing compliance by third parties with this License.
An "entity transaction" is a transaction transferring control of an
organization, or substantially all assets of one, or subdividing an
organization, or merging organizations. If propagation of a covered
work results from an entity transaction, each party to that
transaction who receives a copy of the work also receives whatever
licenses to the work the party's predecessor in interest had or could
give under the previous paragraph, plus a right to possession of the
Corresponding Source of the work from the predecessor in interest, if
the predecessor has it or can get it with reasonable efforts.
You may not impose any further restrictions on the exercise of the
rights granted or affirmed under this License. For example, you may
not impose a license fee, royalty, or other charge for exercise of
rights granted under this License, and you may not initiate litigation
(including a cross-claim or counterclaim in a lawsuit) alleging that
any patent claim is infringed by making, using, selling, offering for
sale, or importing the Program or any portion of it.
11. Patents.
A "contributor" is a copyright holder who authorizes use under this
License of the Program or a work on which the Program is based. The
work thus licensed is called the contributor's "contributor version".
A contributor's "essential patent claims" are all patent claims
owned or controlled by the contributor, whether already acquired or
hereafter acquired, that would be infringed by some manner, permitted
by this License, of making, using, or selling its contributor version,
but do not include claims that would be infringed only as a
consequence of further modification of the contributor version. For
purposes of this definition, "control" includes the right to grant
patent sublicenses in a manner consistent with the requirements of
this License.
Each contributor grants you a non-exclusive, worldwide, royalty-free
patent license under the contributor's essential patent claims, to
make, use, sell, offer for sale, import and otherwise run, modify and
propagate the contents of its contributor version.
In the following three paragraphs, a "patent license" is any express
agreement or commitment, however denominated, not to enforce a patent
(such as an express permission to practice a patent or covenant not to
sue for patent infringement). To "grant" such a patent license to a
party means to make such an agreement or commitment not to enforce a
patent against the party.
If you convey a covered work, knowingly relying on a patent license,
and the Corresponding Source of the work is not available for anyone
to copy, free of charge and under the terms of this License, through a
publicly available network server or other readily accessible means,
then you must either (1) cause the Corresponding Source to be so
available, or (2) arrange to deprive yourself of the benefit of the
patent license for this particular work, or (3) arrange, in a manner
consistent with the requirements of this License, to extend the patent
license to downstream recipients. "Knowingly relying" means you have
actual knowledge that, but for the patent license, your conveying the
covered work in a country, or your recipient's use of the covered work
in a country, would infringe one or more identifiable patents in that
country that you have reason to believe are valid.
If, pursuant to or in connection with a single transaction or
arrangement, you convey, or propagate by procuring conveyance of, a
covered work, and grant a patent license to some of the parties
receiving the covered work authorizing them to use, propagate, modify
or convey a specific copy of the covered work, then the patent license
you grant is automatically extended to all recipients of the covered
work and works based on it.
A patent license is "discriminatory" if it does not include within
the scope of its coverage, prohibits the exercise of, or is
conditioned on the non-exercise of one or more of the rights that are
specifically granted under this License. You may not convey a covered
work if you are a party to an arrangement with a third party that is
in the business of distributing software, under which you make payment
to the third party based on the extent of your activity of conveying
the work, and under which the third party grants, to any of the
parties who would receive the covered work from you, a discriminatory
patent license (a) in connection with copies of the covered work
conveyed by you (or copies made from those copies), or (b) primarily
for and in connection with specific products or compilations that
contain the covered work, unless you entered into that arrangement,
or that patent license was granted, prior to 28 March 2007.
Nothing in this License shall be construed as excluding or limiting
any implied license or other defenses to infringement that may
otherwise be available to you under applicable patent law.
12. No Surrender of Others' Freedom.
If conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License. If you cannot convey a
covered work so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you may
not convey it at all. For example, if you agree to terms that obligate you
to collect a royalty for further conveying from those to whom you convey
the Program, the only way you could satisfy both those terms and this
License would be to refrain entirely from conveying the Program.
13. Remote Network Interaction; Use with the GNU General Public License.
Notwithstanding any other provision of this License, if you modify the
Program, your modified version must prominently offer all users
interacting with it remotely through a computer network (if your version
supports such interaction) an opportunity to receive the Corresponding
Source of your version by providing access to the Corresponding Source
from a network server at no charge, through some standard or customary
means of facilitating copying of software. This Corresponding Source
shall include the Corresponding Source for any work covered by version 3
of the GNU General Public License that is incorporated pursuant to the
following paragraph.
Notwithstanding any other provision of this License, you have
permission to link or combine any covered work with a work licensed
under version 3 of the GNU General Public License into a single
combined work, and to convey the resulting work. The terms of this
License will continue to apply to the part which is the covered work,
but the work with which it is combined will remain governed by version
3 of the GNU General Public License.
14. Revised Versions of this License.
The Free Software Foundation may publish revised and/or new versions of
the GNU Affero General Public License from time to time. Such new versions
will be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.
Each version is given a distinguishing version number. If the
Program specifies that a certain numbered version of the GNU Affero General
Public License "or any later version" applies to it, you have the
option of following the terms and conditions either of that numbered
version or of any later version published by the Free Software
Foundation. If the Program does not specify a version number of the
GNU Affero General Public License, you may choose any version ever published
by the Free Software Foundation.
If the Program specifies that a proxy can decide which future
versions of the GNU Affero General Public License can be used, that proxy's
public statement of acceptance of a version permanently authorizes you
to choose that version for the Program.
Later license versions may give you additional or different
permissions. However, no additional obligations are imposed on any
author or copyright holder as a result of your choosing to follow a
later version.
15. Disclaimer of Warranty.
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
16. Limitation of Liability.
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
SUCH DAMAGES.
17. Interpretation of Sections 15 and 16.
If the disclaimer of warranty and limitation of liability provided
above cannot be given local legal effect according to their terms,
reviewing courts shall apply local law that most closely approximates
an absolute waiver of all civil liability in connection with the
Program, unless a warranty or assumption of liability accompanies a
copy of the Program in return for a fee.
END OF TERMS AND CONDITIONS
How to Apply These Terms to Your New Programs
If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.
To do so, attach the following notices to the program. It is safest
to attach them to the start of each source file to most effectively
state the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.
<one line to give the program's name and a brief idea of what it does.>
Copyright (C) <year> <name of author>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
Also add information on how to contact you by electronic and paper mail.
If your software can interact with users remotely through a computer
network, you should also make sure that it provides a way for users to
get its source. For example, if your program is a web application, its
interface could display a "Source" link that leads users to an archive
of the code. There are many ways you could offer source, and different
solutions will be better for different programs; see section 13 for the
specific requirements.
You should also get your employer (if you work as a programmer) or school,
if any, to sign a "copyright disclaimer" for the program, if necessary.
For more information on this, and how to apply and follow the GNU AGPL, see
<https://www.gnu.org/licenses/>.

299
README.md
View File

@@ -1,250 +1,117 @@
# MyFSIO # MyFSIO (Flask S3 + IAM)
A lightweight, S3-compatible object storage system built with Flask. MyFSIO implements core AWS S3 REST API operations with filesystem-backed storage, making it ideal for local development, testing, and self-hosted storage scenarios. MyFSIO is a batteries-included, Flask-based recreation of Amazon S3 and IAM workflows built for local development. The design mirrors the [AWS S3 documentation](https://docs.aws.amazon.com/s3/) wherever practical: bucket naming, Signature Version 4 presigning, Version 2012-10-17 bucket policies, IAM-style users, and familiar REST endpoints.
## Features ## Why MyFSIO?
**Core Storage** - **Dual servers:** Run both the API (port 5000) and UI (port 5100) with a single command: `python run.py`.
- S3-compatible REST API with AWS Signature Version 4 authentication - **IAM + access keys:** Users, access keys, key rotation, and bucket-scoped actions (`list/read/write/delete/policy`) now live in `data/.myfsio.sys/config/iam.json` and are editable from the IAM dashboard.
- Bucket and object CRUD operations - **Bucket policies + hot reload:** `data/.myfsio.sys/config/bucket_policies.json` uses AWS' policy grammar (Version `2012-10-17`) with a built-in watcher, so editing the JSON file applies immediately. The UI also ships Public/Private/Custom presets for faster edits.
- Object versioning with version history - **Presigned URLs everywhere:** Signature Version 4 presigned URLs respect IAM + bucket policies and replace the now-removed "share link" feature for public access scenarios.
- Multipart uploads for large files - **Modern UI:** Responsive tables, quick filters, preview sidebar, object-level delete buttons, a presign modal, and an inline JSON policy editor that respects dark mode keep bucket management friendly. The object browser supports folder navigation, infinite scroll pagination, bulk operations, and automatic retry on load failures.
- Presigned URLs (1 second to 7 days validity) - **Tests & health:** `/healthz` for smoke checks and `pytest` coverage for IAM, CRUD, presign, and policy flows.
**Security & Access Control** ## Architecture at a Glance
- IAM users with access key management and rotation
- Bucket policies (AWS Policy Version 2012-10-17)
- Server-side encryption (SSE-S3 and SSE-KMS)
- Built-in Key Management Service (KMS)
- Rate limiting per endpoint
**Advanced Features**
- Cross-bucket replication to remote S3-compatible endpoints
- Hot-reload for bucket policies (no restart required)
- CORS configuration per bucket
**Management UI**
- Web console for bucket and object management
- IAM dashboard for user administration
- Inline JSON policy editor with presets
- Object browser with folder navigation and bulk operations
- Dark mode support
## Architecture
``` ```
+------------------+ +------------------+ +-----------------+ +----------------+
| API Server | | UI Server | | API Server |<----->| Object storage |
| (port 5000) | | (port 5100) | | (port 5000) | | (filesystem) |
| | | | | - S3 routes | +----------------+
| - S3 REST API |<------->| - Web Console | | - Presigned URLs |
| - SigV4 Auth | | - IAM Dashboard | | - Bucket policy |
| - Presign URLs | | - Bucket Editor | +-----------------+
+--------+---------+ +------------------+ ^
| |
v +-----------------+
+------------------+ +------------------+ | UI Server |
| Object Storage | | System Metadata | | (port 5100) |
| (filesystem) | | (.myfsio.sys/) | | - Auth console |
| | | | | - IAM dashboard|
| data/<bucket>/ | | - IAM config | | - Bucket editor|
| <objects> | | - Bucket policies| +-----------------+
| | | - Encryption keys|
+------------------+ +------------------+
``` ```
## Quick Start Both apps load the same configuration via `AppConfig` so IAM data and bucket policies stay consistent no matter which process you run.
Bucket policies are automatically reloaded whenever `bucket_policies.json` changes—no restarts required.
## Getting Started
```bash ```bash
# Clone and setup
git clone https://gitea.jzwsite.com/kqjy/MyFSIO
cd s3
python -m venv .venv python -m venv .venv
. .venv/Scripts/activate # PowerShell: .\.venv\Scripts\Activate.ps1
# Activate virtual environment
# Windows PowerShell:
.\.venv\Scripts\Activate.ps1
# Windows CMD:
.venv\Scripts\activate.bat
# Linux/macOS:
source .venv/bin/activate
# Install dependencies
pip install -r requirements.txt pip install -r requirements.txt
# Start both servers # Run both API and UI (default)
python run.py python run.py
# Or start individually # Or run individually:
python run.py --mode api # API only (port 5000) # python run.py --mode api
python run.py --mode ui # UI only (port 5100) # python run.py --mode ui
``` ```
**Default Credentials:** `localadmin` / `localadmin` Visit `http://127.0.0.1:5100/ui` for the console and `http://127.0.0.1:5000/` for the raw API. Override ports/hosts with the environment variables listed below.
- **Web Console:** http://127.0.0.1:5100/ui ## IAM, Access Keys, and Bucket Policies
- **API Endpoint:** http://127.0.0.1:5000
- First run creates `data/.myfsio.sys/config/iam.json` with `localadmin / localadmin` (full control). Sign in via the UI, then use the **IAM** tab to create users, rotate secrets, or edit inline policies without touching JSON by hand.
- Bucket policies live in `data/.myfsio.sys/config/bucket_policies.json` and follow the AWS `arn:aws:s3:::bucket/key` resource syntax with Version `2012-10-17`. Attach/replace/remove policies from the bucket detail page or edit the JSON by hand—changes hot reload automatically.
- IAM actions include extended verbs (`iam:list_users`, `iam:create_user`, `iam:update_policy`, etc.) so you can control who is allowed to manage other users and policies.
### Bucket Policy Presets & Hot Reload
- **Presets:** Every bucket detail view includes Public (read-only), Private (detach policy), and Custom presets. Public auto-populates a policy that grants anonymous `s3:ListBucket` + `s3:GetObject` access to the entire bucket.
- **Custom drafts:** Switching back to Custom restores your last manual edit so you can toggle between presets without losing work.
- **Hot reload:** The server watches `bucket_policies.json` and reloads statements on-the-fly—ideal for editing policies in your favorite editor while testing Via curl or the UI.
## Presigned URLs
Presigned URLs follow the AWS CLI playbook:
- Call `POST /presign/<bucket>/<key>` (or use the "Presign" button in the UI) to request a Signature Version 4 URL valid for 1 second to 7 days.
- The generated URL honors IAM permissions and bucket-policy decisions at generation-time and again when somebody fetches it.
- Because presigned URLs cover both authenticated and public sharing scenarios, the legacy "share link" feature has been removed.
## Configuration ## Configuration
| Variable | Default | Description | | Variable | Default | Description |
|----------|---------|-------------| | --- | --- | --- |
| `STORAGE_ROOT` | `./data` | Filesystem root for bucket storage | | `STORAGE_ROOT` | `<project>/data` | Filesystem root for bucket directories |
| `IAM_CONFIG` | `.myfsio.sys/config/iam.json` | IAM user and policy store | | `MAX_UPLOAD_SIZE` | `1073741824` | Maximum upload size (bytes) |
| `BUCKET_POLICY_PATH` | `.myfsio.sys/config/bucket_policies.json` | Bucket policy store | | `UI_PAGE_SIZE` | `100` | `MaxKeys` hint for listings |
| `API_BASE_URL` | `http://127.0.0.1:5000` | API endpoint for UI calls | | `SECRET_KEY` | `dev-secret-key` | Flask session secret for the UI |
| `MAX_UPLOAD_SIZE` | `1073741824` | Maximum upload size in bytes (1 GB) | | `IAM_CONFIG` | `<project>/data/.myfsio.sys/config/iam.json` | IAM user + policy store |
| `MULTIPART_MIN_PART_SIZE` | `5242880` | Minimum multipart part size (5 MB) | | `BUCKET_POLICY_PATH` | `<project>/data/.myfsio.sys/config/bucket_policies.json` | Bucket policy store |
| `UI_PAGE_SIZE` | `100` | Default page size for listings | | `API_BASE_URL` | `http://127.0.0.1:5000` | Used by the UI when calling API endpoints (presign, bucket policy) |
| `SECRET_KEY` | `dev-secret-key` | Flask session secret | | `AWS_REGION` | `us-east-1` | Region used in Signature V4 scope |
| `AWS_REGION` | `us-east-1` | Region for SigV4 signing | | `AWS_SERVICE` | `s3` | Service used in Signature V4 scope |
| `AWS_SERVICE` | `s3` | Service name for SigV4 signing |
| `ENCRYPTION_ENABLED` | `false` | Enable server-side encryption |
| `KMS_ENABLED` | `false` | Enable Key Management Service |
| `LOG_LEVEL` | `INFO` | Logging verbosity |
| `SIGV4_TIMESTAMP_TOLERANCE_SECONDS` | `900` | Max time skew for SigV4 requests |
| `PRESIGNED_URL_MAX_EXPIRY_SECONDS` | `604800` | Max presigned URL expiry (7 days) |
| `REPLICATION_CONNECT_TIMEOUT_SECONDS` | `5` | Replication connection timeout |
| `SITE_SYNC_ENABLED` | `false` | Enable bi-directional site sync |
| `OBJECT_TAG_LIMIT` | `50` | Maximum tags per object |
## Data Layout > Buckets now live directly under `data/` while system metadata (versions, IAM, bucket policies, multipart uploads, etc.) lives in `data/.myfsio.sys`.
## API Cheatsheet (IAM headers required)
``` ```
data/ GET / -> List buckets (XML)
├── <bucket>/ # User buckets with objects PUT /<bucket> -> Create bucket
└── .myfsio.sys/ # System metadata DELETE /<bucket> -> Delete bucket (must be empty)
├── config/ GET /<bucket> -> List objects (XML)
│ ├── iam.json # IAM users and policies PUT /<bucket>/<key> -> Upload object (binary stream)
│ ├── bucket_policies.json # Bucket policies GET /<bucket>/<key> -> Download object
├── replication_rules.json DELETE /<bucket>/<key> -> Delete object
│ └── connections.json # Remote S3 connections POST /presign/<bucket>/<key> -> Generate AWS SigV4 presigned URL (JSON)
├── buckets/<bucket>/ GET /bucket-policy/<bucket> -> Fetch bucket policy (JSON)
├── meta/ # Object metadata (.meta.json) PUT /bucket-policy/<bucket> -> Attach/replace bucket policy (JSON)
│ ├── versions/ # Archived object versions DELETE /bucket-policy/<bucket> -> Remove bucket policy
│ └── .bucket.json # Bucket config (versioning, CORS)
├── multipart/ # Active multipart uploads
└── keys/ # Encryption keys (SSE-S3/KMS)
```
## API Reference
All endpoints require AWS Signature Version 4 authentication unless using presigned URLs or public bucket policies.
### Bucket Operations
| Method | Endpoint | Description |
|--------|----------|-------------|
| `GET` | `/` | List all buckets |
| `PUT` | `/<bucket>` | Create bucket |
| `DELETE` | `/<bucket>` | Delete bucket (must be empty) |
| `HEAD` | `/<bucket>` | Check bucket exists |
### Object Operations
| Method | Endpoint | Description |
|--------|----------|-------------|
| `GET` | `/<bucket>` | List objects (supports `list-type=2`) |
| `PUT` | `/<bucket>/<key>` | Upload object |
| `GET` | `/<bucket>/<key>` | Download object |
| `DELETE` | `/<bucket>/<key>` | Delete object |
| `HEAD` | `/<bucket>/<key>` | Get object metadata |
| `POST` | `/<bucket>/<key>?uploads` | Initiate multipart upload |
| `PUT` | `/<bucket>/<key>?partNumber=N&uploadId=X` | Upload part |
| `POST` | `/<bucket>/<key>?uploadId=X` | Complete multipart upload |
| `DELETE` | `/<bucket>/<key>?uploadId=X` | Abort multipart upload |
### Bucket Policies (S3-compatible)
| Method | Endpoint | Description |
|--------|----------|-------------|
| `GET` | `/<bucket>?policy` | Get bucket policy |
| `PUT` | `/<bucket>?policy` | Set bucket policy |
| `DELETE` | `/<bucket>?policy` | Delete bucket policy |
### Versioning
| Method | Endpoint | Description |
|--------|----------|-------------|
| `GET` | `/<bucket>/<key>?versionId=X` | Get specific version |
| `DELETE` | `/<bucket>/<key>?versionId=X` | Delete specific version |
| `GET` | `/<bucket>?versions` | List object versions |
### Health Check
| Method | Endpoint | Description |
|--------|----------|-------------|
| `GET` | `/myfsio/health` | Health check endpoint |
## IAM & Access Control
### Users and Access Keys
On first run, MyFSIO creates a default admin user (`localadmin`/`localadmin`). Use the IAM dashboard to:
- Create and delete users
- Generate and rotate access keys
- Attach inline policies to users
- Control IAM management permissions
### Bucket Policies
Bucket policies follow AWS policy grammar (Version `2012-10-17`) with support for:
- Principal-based access (`*` for anonymous, specific users)
- Action-based permissions (`s3:GetObject`, `s3:PutObject`, etc.)
- Resource patterns (`arn:aws:s3:::bucket/*`)
- Condition keys
**Policy Presets:**
- **Public:** Grants anonymous read access (`s3:GetObject`, `s3:ListBucket`)
- **Private:** Removes bucket policy (IAM-only access)
- **Custom:** Manual policy editing with draft preservation
Policies hot-reload when the JSON file changes.
## Server-Side Encryption
MyFSIO supports two encryption modes:
- **SSE-S3:** Server-managed keys with automatic key rotation
- **SSE-KMS:** Customer-managed keys via built-in KMS
Enable encryption with:
```bash
ENCRYPTION_ENABLED=true python run.py
```
## Cross-Bucket Replication
Replicate objects to remote S3-compatible endpoints:
1. Configure remote connections in the UI
2. Create replication rules specifying source/destination
3. Objects are automatically replicated on upload
## Docker
```bash
docker build -t myfsio .
docker run -p 5000:5000 -p 5100:5100 -v ./data:/app/data myfsio
``` ```
## Testing ## Testing
```bash ```bash
# Run all tests pytest -q
pytest tests/ -v
# Run specific test file
pytest tests/test_api.py -v
# Run with coverage
pytest tests/ --cov=app --cov-report=html
``` ```
## References ## References
- [Amazon S3 Documentation](https://docs.aws.amazon.com/s3/) - [Amazon Simple Storage Service Documentation](https://docs.aws.amazon.com/s3/)
- [AWS Signature Version 4](https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html) - [Signature Version 4 Signing Process](https://docs.aws.amazon.com/general/latest/gr/signature-version-4.html)
- [S3 Bucket Policy Examples](https://docs.aws.amazon.com/AmazonS3/latest/userguide/example-bucket-policies.html) - [Amazon S3 Bucket Policy Examples](https://docs.aws.amazon.com/AmazonS3/latest/userguide/example-bucket-policies.html)

View File

@@ -1,8 +1,7 @@
"""Application factory for the mini S3-compatible object store."""
from __future__ import annotations from __future__ import annotations
import html as html_module
import logging import logging
import mimetypes
import shutil import shutil
import sys import sys
import time import time
@@ -12,15 +11,11 @@ from pathlib import Path
from datetime import timedelta from datetime import timedelta
from typing import Any, Dict, List, Optional from typing import Any, Dict, List, Optional
from flask import Flask, Response, g, has_request_context, redirect, render_template, request, url_for from flask import Flask, g, has_request_context, redirect, render_template, request, url_for
from flask_cors import CORS from flask_cors import CORS
from flask_wtf.csrf import CSRFError from flask_wtf.csrf import CSRFError
from werkzeug.middleware.proxy_fix import ProxyFix from werkzeug.middleware.proxy_fix import ProxyFix
from .access_logging import AccessLoggingService
from .operation_metrics import OperationMetricsCollector, classify_endpoint
from .compression import GzipMiddleware
from .acl import AclService
from .bucket_policies import BucketPolicyStore from .bucket_policies import BucketPolicyStore
from .config import AppConfig from .config import AppConfig
from .connections import ConnectionStore from .connections import ConnectionStore
@@ -28,15 +23,10 @@ from .encryption import EncryptionManager
from .extensions import limiter, csrf from .extensions import limiter, csrf
from .iam import IamService from .iam import IamService
from .kms import KMSManager from .kms import KMSManager
from .lifecycle import LifecycleManager
from .notifications import NotificationService
from .object_lock import ObjectLockService
from .replication import ReplicationManager from .replication import ReplicationManager
from .secret_store import EphemeralSecretStore from .secret_store import EphemeralSecretStore
from .site_registry import SiteRegistry, SiteInfo from .storage import ObjectStorage
from .storage import ObjectStorage, StorageError
from .version import get_version from .version import get_version
from .website_domains import WebsiteDomainStore
def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path: def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
@@ -55,6 +45,7 @@ def _migrate_config_file(active_path: Path, legacy_paths: List[Path]) -> Path:
try: try:
shutil.move(str(legacy_path), str(active_path)) shutil.move(str(legacy_path), str(active_path))
except OSError: except OSError:
# Fall back to copy + delete if move fails (e.g., cross-device)
shutil.copy2(legacy_path, active_path) shutil.copy2(legacy_path, active_path)
try: try:
legacy_path.unlink(missing_ok=True) legacy_path.unlink(missing_ok=True)
@@ -95,27 +86,13 @@ def create_app(
# Trust X-Forwarded-* headers from proxies # Trust X-Forwarded-* headers from proxies
app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1) app.wsgi_app = ProxyFix(app.wsgi_app, x_for=1, x_proto=1, x_host=1, x_prefix=1)
# Enable gzip compression for responses (10-20x smaller JSON payloads)
if app.config.get("ENABLE_GZIP", True):
app.wsgi_app = GzipMiddleware(app.wsgi_app, compression_level=6)
_configure_cors(app) _configure_cors(app)
_configure_logging(app) _configure_logging(app)
limiter.init_app(app) limiter.init_app(app)
csrf.init_app(app) csrf.init_app(app)
storage = ObjectStorage( storage = ObjectStorage(Path(app.config["STORAGE_ROOT"]))
Path(app.config["STORAGE_ROOT"]),
cache_ttl=app.config.get("OBJECT_CACHE_TTL", 5),
object_cache_max_size=app.config.get("OBJECT_CACHE_MAX_SIZE", 100),
bucket_config_cache_ttl=app.config.get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0),
object_key_max_length_bytes=app.config.get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024),
)
if app.config.get("WARM_CACHE_ON_STARTUP", True) and not app.config.get("TESTING"):
storage.warm_cache_async()
iam = IamService( iam = IamService(
Path(app.config["IAM_CONFIG"]), Path(app.config["IAM_CONFIG"]),
auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5), auth_max_attempts=app.config.get("AUTH_MAX_ATTEMPTS", 5),
@@ -124,53 +101,36 @@ def create_app(
bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"])) bucket_policies = BucketPolicyStore(Path(app.config["BUCKET_POLICY_PATH"]))
secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300)) secret_store = EphemeralSecretStore(default_ttl=app.config.get("SECRET_TTL_SECONDS", 300))
# Initialize Replication components
# Store config files in the system config directory for consistency
storage_root = Path(app.config["STORAGE_ROOT"]) storage_root = Path(app.config["STORAGE_ROOT"])
config_dir = storage_root / ".myfsio.sys" / "config" config_dir = storage_root / ".myfsio.sys" / "config"
config_dir.mkdir(parents=True, exist_ok=True) config_dir.mkdir(parents=True, exist_ok=True)
# Define paths with migration from legacy locations
connections_path = _migrate_config_file( connections_path = _migrate_config_file(
active_path=config_dir / "connections.json", active_path=config_dir / "connections.json",
legacy_paths=[ legacy_paths=[
storage_root / ".myfsio.sys" / "connections.json", storage_root / ".myfsio.sys" / "connections.json", # Previous location
storage_root / ".connections.json", storage_root / ".connections.json", # Original legacy location
], ],
) )
replication_rules_path = _migrate_config_file( replication_rules_path = _migrate_config_file(
active_path=config_dir / "replication_rules.json", active_path=config_dir / "replication_rules.json",
legacy_paths=[ legacy_paths=[
storage_root / ".myfsio.sys" / "replication_rules.json", storage_root / ".myfsio.sys" / "replication_rules.json", # Previous location
storage_root / ".replication_rules.json", storage_root / ".replication_rules.json", # Original legacy location
], ],
) )
connections = ConnectionStore(connections_path) connections = ConnectionStore(connections_path)
replication = ReplicationManager( replication = ReplicationManager(storage, connections, replication_rules_path)
storage,
connections, # Initialize encryption and KMS
replication_rules_path,
storage_root,
connect_timeout=app.config.get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5),
read_timeout=app.config.get("REPLICATION_READ_TIMEOUT_SECONDS", 30),
max_retries=app.config.get("REPLICATION_MAX_RETRIES", 2),
streaming_threshold_bytes=app.config.get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024),
max_failures_per_bucket=app.config.get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50),
)
site_registry_path = config_dir / "site_registry.json"
site_registry = SiteRegistry(site_registry_path)
if app.config.get("SITE_ID") and not site_registry.get_local_site():
site_registry.set_local_site(SiteInfo(
site_id=app.config["SITE_ID"],
endpoint=app.config.get("SITE_ENDPOINT") or "",
region=app.config.get("SITE_REGION", "us-east-1"),
priority=app.config.get("SITE_PRIORITY", 100),
))
encryption_config = { encryption_config = {
"encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False), "encryption_enabled": app.config.get("ENCRYPTION_ENABLED", False),
"encryption_master_key_path": app.config.get("ENCRYPTION_MASTER_KEY_PATH"), "encryption_master_key_path": app.config.get("ENCRYPTION_MASTER_KEY_PATH"),
"default_encryption_algorithm": app.config.get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"), "default_encryption_algorithm": app.config.get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"),
"encryption_chunk_size_bytes": app.config.get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024),
} }
encryption_manager = EncryptionManager(encryption_config) encryption_manager = EncryptionManager(encryption_config)
@@ -178,38 +138,14 @@ def create_app(
if app.config.get("KMS_ENABLED", False): if app.config.get("KMS_ENABLED", False):
kms_keys_path = Path(app.config.get("KMS_KEYS_PATH", "")) kms_keys_path = Path(app.config.get("KMS_KEYS_PATH", ""))
kms_master_key_path = Path(app.config.get("ENCRYPTION_MASTER_KEY_PATH", "")) kms_master_key_path = Path(app.config.get("ENCRYPTION_MASTER_KEY_PATH", ""))
kms_manager = KMSManager( kms_manager = KMSManager(kms_keys_path, kms_master_key_path)
kms_keys_path,
kms_master_key_path,
generate_data_key_min_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1),
generate_data_key_max_bytes=app.config.get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024),
)
encryption_manager.set_kms_provider(kms_manager) encryption_manager.set_kms_provider(kms_manager)
# Wrap storage with encryption layer if encryption is enabled
if app.config.get("ENCRYPTION_ENABLED", False): if app.config.get("ENCRYPTION_ENABLED", False):
from .encrypted_storage import EncryptedObjectStorage from .encrypted_storage import EncryptedObjectStorage
storage = EncryptedObjectStorage(storage, encryption_manager) storage = EncryptedObjectStorage(storage, encryption_manager)
acl_service = AclService(storage_root)
object_lock_service = ObjectLockService(storage_root)
notification_service = NotificationService(
storage_root,
allow_internal_endpoints=app.config.get("ALLOW_INTERNAL_ENDPOINTS", False),
)
access_logging_service = AccessLoggingService(storage_root)
access_logging_service.set_storage(storage)
lifecycle_manager = None
if app.config.get("LIFECYCLE_ENABLED", False):
base_storage = storage.storage if hasattr(storage, 'storage') else storage
lifecycle_manager = LifecycleManager(
base_storage,
interval_seconds=app.config.get("LIFECYCLE_INTERVAL_SECONDS", 3600),
storage_root=storage_root,
max_history_per_bucket=app.config.get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50),
)
lifecycle_manager.start()
app.extensions["object_storage"] = storage app.extensions["object_storage"] = storage
app.extensions["iam"] = iam app.extensions["iam"] = iam
app.extensions["bucket_policies"] = bucket_policies app.extensions["bucket_policies"] = bucket_policies
@@ -219,97 +155,14 @@ def create_app(
app.extensions["replication"] = replication app.extensions["replication"] = replication
app.extensions["encryption"] = encryption_manager app.extensions["encryption"] = encryption_manager
app.extensions["kms"] = kms_manager app.extensions["kms"] = kms_manager
app.extensions["acl"] = acl_service
app.extensions["lifecycle"] = lifecycle_manager
app.extensions["object_lock"] = object_lock_service
app.extensions["notifications"] = notification_service
app.extensions["access_logging"] = access_logging_service
app.extensions["site_registry"] = site_registry
website_domains_store = None
if app.config.get("WEBSITE_HOSTING_ENABLED", False):
website_domains_path = config_dir / "website_domains.json"
website_domains_store = WebsiteDomainStore(website_domains_path)
app.extensions["website_domains"] = website_domains_store
from .s3_client import S3ProxyClient
api_base = app.config.get("API_BASE_URL") or "http://127.0.0.1:5000"
app.extensions["s3_proxy"] = S3ProxyClient(
api_base_url=api_base,
region=app.config.get("AWS_REGION", "us-east-1"),
)
operation_metrics_collector = None
if app.config.get("OPERATION_METRICS_ENABLED", False):
operation_metrics_collector = OperationMetricsCollector(
storage_root,
interval_minutes=app.config.get("OPERATION_METRICS_INTERVAL_MINUTES", 5),
retention_hours=app.config.get("OPERATION_METRICS_RETENTION_HOURS", 24),
)
app.extensions["operation_metrics"] = operation_metrics_collector
system_metrics_collector = None
if app.config.get("METRICS_HISTORY_ENABLED", False):
from .system_metrics import SystemMetricsCollector
system_metrics_collector = SystemMetricsCollector(
storage_root,
interval_minutes=app.config.get("METRICS_HISTORY_INTERVAL_MINUTES", 5),
retention_hours=app.config.get("METRICS_HISTORY_RETENTION_HOURS", 24),
)
system_metrics_collector.set_storage(storage)
app.extensions["system_metrics"] = system_metrics_collector
site_sync_worker = None
if app.config.get("SITE_SYNC_ENABLED", False):
from .site_sync import SiteSyncWorker
site_sync_worker = SiteSyncWorker(
storage=storage,
connections=connections,
replication_manager=replication,
storage_root=storage_root,
interval_seconds=app.config.get("SITE_SYNC_INTERVAL_SECONDS", 60),
batch_size=app.config.get("SITE_SYNC_BATCH_SIZE", 100),
connect_timeout=app.config.get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10),
read_timeout=app.config.get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120),
max_retries=app.config.get("SITE_SYNC_MAX_RETRIES", 2),
clock_skew_tolerance_seconds=app.config.get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0),
)
site_sync_worker.start()
app.extensions["site_sync"] = site_sync_worker
@app.errorhandler(500) @app.errorhandler(500)
def internal_error(error): def internal_error(error):
wants_html = request.accept_mimetypes.accept_html return render_template('500.html'), 500
path = request.path or ""
if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
return render_template('500.html'), 500
error_xml = (
'<?xml version="1.0" encoding="UTF-8"?>'
'<Error>'
'<Code>InternalError</Code>'
'<Message>An internal server error occurred</Message>'
f'<Resource>{path}</Resource>'
f'<RequestId>{getattr(g, "request_id", "-")}</RequestId>'
'</Error>'
)
return error_xml, 500, {'Content-Type': 'application/xml'}
@app.errorhandler(CSRFError) @app.errorhandler(CSRFError)
def handle_csrf_error(e): def handle_csrf_error(e):
wants_html = request.accept_mimetypes.accept_html return render_template('csrf_error.html', reason=e.description), 400
path = request.path or ""
if include_ui and wants_html and (path.startswith("/ui") or path == "/"):
return render_template('csrf_error.html', reason=e.description), 400
error_xml = (
'<?xml version="1.0" encoding="UTF-8"?>'
'<Error>'
'<Code>CSRFError</Code>'
f'<Message>{e.description}</Message>'
f'<Resource>{path}</Resource>'
f'<RequestId>{getattr(g, "request_id", "-")}</RequestId>'
'</Error>'
)
return error_xml, 400, {'Content-Type': 'application/xml'}
@app.template_filter("filesizeformat") @app.template_filter("filesizeformat")
def filesizeformat(value: int) -> str: def filesizeformat(value: int) -> str:
@@ -324,60 +177,24 @@ def create_app(
@app.template_filter("timestamp_to_datetime") @app.template_filter("timestamp_to_datetime")
def timestamp_to_datetime(value: float) -> str: def timestamp_to_datetime(value: float) -> str:
"""Format Unix timestamp as human-readable datetime in configured timezone.""" """Format Unix timestamp as human-readable datetime."""
from datetime import datetime, timezone as dt_timezone from datetime import datetime
from zoneinfo import ZoneInfo
if not value: if not value:
return "Never" return "Never"
try: try:
dt_utc = datetime.fromtimestamp(value, dt_timezone.utc) dt = datetime.fromtimestamp(value)
display_tz = app.config.get("DISPLAY_TIMEZONE", "UTC") return dt.strftime("%Y-%m-%d %H:%M:%S")
if display_tz and display_tz != "UTC":
try:
tz = ZoneInfo(display_tz)
dt_local = dt_utc.astimezone(tz)
return dt_local.strftime("%Y-%m-%d %H:%M:%S")
except (KeyError, ValueError):
pass
return dt_utc.strftime("%Y-%m-%d %H:%M:%S UTC")
except (ValueError, OSError): except (ValueError, OSError):
return "Unknown" return "Unknown"
@app.template_filter("format_datetime")
def format_datetime_filter(dt, include_tz: bool = True) -> str:
"""Format datetime object as human-readable string in configured timezone."""
from datetime import datetime, timezone as dt_timezone
from zoneinfo import ZoneInfo
if not dt:
return ""
try:
display_tz = app.config.get("DISPLAY_TIMEZONE", "UTC")
if display_tz and display_tz != "UTC":
try:
tz = ZoneInfo(display_tz)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=dt_timezone.utc)
dt = dt.astimezone(tz)
except (KeyError, ValueError):
pass
tz_abbr = dt.strftime("%Z") or "UTC"
if include_tz:
return f"{dt.strftime('%b %d, %Y %H:%M')} ({tz_abbr})"
return dt.strftime("%b %d, %Y %H:%M")
except (ValueError, AttributeError):
return str(dt)
if include_api: if include_api:
from .s3_api import s3_api_bp from .s3_api import s3_api_bp
from .kms_api import kms_api_bp from .kms_api import kms_api_bp
from .admin_api import admin_api_bp
app.register_blueprint(s3_api_bp) app.register_blueprint(s3_api_bp)
app.register_blueprint(kms_api_bp) app.register_blueprint(kms_api_bp)
app.register_blueprint(admin_api_bp)
csrf.exempt(s3_api_bp) csrf.exempt(s3_api_bp)
csrf.exempt(kms_api_bp) csrf.exempt(kms_api_bp)
csrf.exempt(admin_api_bp)
if include_ui: if include_ui:
from .ui import ui_bp from .ui import ui_bp
@@ -397,9 +214,9 @@ def create_app(
return render_template("404.html"), 404 return render_template("404.html"), 404
return error return error
@app.get("/myfsio/health") @app.get("/healthz")
def healthcheck() -> Dict[str, str]: def healthcheck() -> Dict[str, str]:
return {"status": "ok"} return {"status": "ok", "version": app.config.get("APP_VERSION", "unknown")}
return app return app
@@ -427,7 +244,7 @@ def _configure_cors(app: Flask) -> None:
class _RequestContextFilter(logging.Filter): class _RequestContextFilter(logging.Filter):
"""Inject request-specific attributes into log records.""" """Inject request-specific attributes into log records."""
def filter(self, record: logging.LogRecord) -> bool: def filter(self, record: logging.LogRecord) -> bool: # pragma: no cover - simple boilerplate
if has_request_context(): if has_request_context():
record.request_id = getattr(g, "request_id", "-") record.request_id = getattr(g, "request_id", "-")
record.path = request.path record.path = request.path
@@ -445,17 +262,17 @@ def _configure_logging(app: Flask) -> None:
formatter = logging.Formatter( formatter = logging.Formatter(
"%(asctime)s | %(levelname)s | %(request_id)s | %(method)s %(path)s | %(message)s" "%(asctime)s | %(levelname)s | %(request_id)s | %(method)s %(path)s | %(message)s"
) )
# Stream Handler (stdout) - Primary for Docker
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setFormatter(formatter) stream_handler.setFormatter(formatter)
stream_handler.addFilter(_RequestContextFilter()) stream_handler.addFilter(_RequestContextFilter())
logger = app.logger logger = app.logger
for handler in logger.handlers[:]:
handler.close()
logger.handlers.clear() logger.handlers.clear()
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
# File Handler (optional, if configured)
if app.config.get("LOG_TO_FILE"): if app.config.get("LOG_TO_FILE"):
log_file = Path(app.config["LOG_FILE"]) log_file = Path(app.config["LOG_FILE"])
log_file.parent.mkdir(parents=True, exist_ok=True) log_file.parent.mkdir(parents=True, exist_ok=True)
@@ -475,134 +292,11 @@ def _configure_logging(app: Flask) -> None:
def _log_request_start() -> None: def _log_request_start() -> None:
g.request_id = uuid.uuid4().hex g.request_id = uuid.uuid4().hex
g.request_started_at = time.perf_counter() g.request_started_at = time.perf_counter()
g.request_bytes_in = request.content_length or 0
app.logger.info( app.logger.info(
"Request started", "Request started",
extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr}, extra={"path": request.path, "method": request.method, "remote_addr": request.remote_addr},
) )
@app.before_request
def _maybe_serve_website():
if not app.config.get("WEBSITE_HOSTING_ENABLED"):
return None
if request.method not in {"GET", "HEAD"}:
return None
host = request.host
if ":" in host:
host = host.rsplit(":", 1)[0]
host = host.lower()
store = app.extensions.get("website_domains")
if not store:
return None
bucket = store.get_bucket(host)
if not bucket:
return None
storage = app.extensions["object_storage"]
if not storage.bucket_exists(bucket):
return _website_error_response(404, "Not Found")
website_config = storage.get_bucket_website(bucket)
if not website_config:
return _website_error_response(404, "Not Found")
index_doc = website_config.get("index_document", "index.html")
error_doc = website_config.get("error_document")
req_path = request.path.lstrip("/")
if not req_path or req_path.endswith("/"):
object_key = req_path + index_doc
else:
object_key = req_path
try:
obj_path = storage.get_object_path(bucket, object_key)
except (StorageError, OSError):
if object_key == req_path:
try:
obj_path = storage.get_object_path(bucket, req_path + "/" + index_doc)
object_key = req_path + "/" + index_doc
except (StorageError, OSError):
return _serve_website_error(storage, bucket, error_doc, 404)
else:
return _serve_website_error(storage, bucket, error_doc, 404)
content_type = mimetypes.guess_type(object_key)[0] or "application/octet-stream"
is_encrypted = False
try:
metadata = storage.get_object_metadata(bucket, object_key)
is_encrypted = "x-amz-server-side-encryption" in metadata
except (StorageError, OSError):
pass
if request.method == "HEAD":
response = Response(status=200)
if is_encrypted and hasattr(storage, "get_object_data"):
try:
data, _ = storage.get_object_data(bucket, object_key)
response.headers["Content-Length"] = len(data)
except (StorageError, OSError):
return _website_error_response(500, "Internal Server Error")
else:
try:
stat = obj_path.stat()
response.headers["Content-Length"] = stat.st_size
except OSError:
return _website_error_response(500, "Internal Server Error")
response.headers["Content-Type"] = content_type
return response
if is_encrypted and hasattr(storage, "get_object_data"):
try:
data, _ = storage.get_object_data(bucket, object_key)
response = Response(data, mimetype=content_type)
response.headers["Content-Length"] = len(data)
return response
except (StorageError, OSError):
return _website_error_response(500, "Internal Server Error")
def _stream(file_path):
with file_path.open("rb") as f:
while True:
chunk = f.read(65536)
if not chunk:
break
yield chunk
try:
stat = obj_path.stat()
response = Response(_stream(obj_path), mimetype=content_type, direct_passthrough=True)
response.headers["Content-Length"] = stat.st_size
return response
except OSError:
return _website_error_response(500, "Internal Server Error")
def _serve_website_error(storage, bucket, error_doc_key, status_code):
if not error_doc_key:
return _website_error_response(status_code, "Not Found" if status_code == 404 else "Error")
try:
obj_path = storage.get_object_path(bucket, error_doc_key)
except (StorageError, OSError):
return _website_error_response(status_code, "Not Found")
content_type = mimetypes.guess_type(error_doc_key)[0] or "text/html"
is_encrypted = False
try:
metadata = storage.get_object_metadata(bucket, error_doc_key)
is_encrypted = "x-amz-server-side-encryption" in metadata
except (StorageError, OSError):
pass
if is_encrypted and hasattr(storage, "get_object_data"):
try:
data, _ = storage.get_object_data(bucket, error_doc_key)
response = Response(data, status=status_code, mimetype=content_type)
response.headers["Content-Length"] = len(data)
return response
except (StorageError, OSError):
return _website_error_response(status_code, "Not Found")
try:
data = obj_path.read_bytes()
response = Response(data, status=status_code, mimetype=content_type)
response.headers["Content-Length"] = len(data)
return response
except OSError:
return _website_error_response(status_code, "Not Found")
def _website_error_response(status_code, message):
safe_msg = html_module.escape(str(message))
safe_code = html_module.escape(str(status_code))
body = f"<html><head><title>{safe_code} {safe_msg}</title></head><body><h1>{safe_code} {safe_msg}</h1></body></html>"
return Response(body, status=status_code, mimetype="text/html")
@app.after_request @app.after_request
def _log_request_end(response): def _log_request_end(response):
duration_ms = 0.0 duration_ms = 0.0
@@ -619,21 +313,4 @@ def _configure_logging(app: Flask) -> None:
}, },
) )
response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}" response.headers["X-Request-Duration-ms"] = f"{duration_ms:.2f}"
operation_metrics = app.extensions.get("operation_metrics")
if operation_metrics:
bytes_in = getattr(g, "request_bytes_in", 0)
bytes_out = response.content_length or 0
error_code = getattr(g, "s3_error_code", None)
endpoint_type = classify_endpoint(request.path)
operation_metrics.record_request(
method=request.method,
endpoint_type=endpoint_type,
status_code=response.status_code,
latency_ms=duration_ms,
bytes_in=bytes_in,
bytes_out=bytes_out,
error_code=error_code,
)
return response return response

View File

@@ -1,265 +0,0 @@
from __future__ import annotations
import io
import json
import logging
import queue
import threading
import time
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
logger = logging.getLogger(__name__)
@dataclass
class AccessLogEntry:
bucket_owner: str = "-"
bucket: str = "-"
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
remote_ip: str = "-"
requester: str = "-"
request_id: str = field(default_factory=lambda: uuid.uuid4().hex[:16].upper())
operation: str = "-"
key: str = "-"
request_uri: str = "-"
http_status: int = 200
error_code: str = "-"
bytes_sent: int = 0
object_size: int = 0
total_time_ms: int = 0
turn_around_time_ms: int = 0
referrer: str = "-"
user_agent: str = "-"
version_id: str = "-"
host_id: str = "-"
signature_version: str = "SigV4"
cipher_suite: str = "-"
authentication_type: str = "AuthHeader"
host_header: str = "-"
tls_version: str = "-"
def to_log_line(self) -> str:
time_str = self.timestamp.strftime("[%d/%b/%Y:%H:%M:%S %z]")
return (
f'{self.bucket_owner} {self.bucket} {time_str} {self.remote_ip} '
f'{self.requester} {self.request_id} {self.operation} {self.key} '
f'"{self.request_uri}" {self.http_status} {self.error_code or "-"} '
f'{self.bytes_sent or "-"} {self.object_size or "-"} {self.total_time_ms or "-"} '
f'{self.turn_around_time_ms or "-"} "{self.referrer}" "{self.user_agent}" {self.version_id}'
)
def to_dict(self) -> Dict[str, Any]:
return {
"bucket_owner": self.bucket_owner,
"bucket": self.bucket,
"timestamp": self.timestamp.isoformat(),
"remote_ip": self.remote_ip,
"requester": self.requester,
"request_id": self.request_id,
"operation": self.operation,
"key": self.key,
"request_uri": self.request_uri,
"http_status": self.http_status,
"error_code": self.error_code,
"bytes_sent": self.bytes_sent,
"object_size": self.object_size,
"total_time_ms": self.total_time_ms,
"referrer": self.referrer,
"user_agent": self.user_agent,
"version_id": self.version_id,
}
@dataclass
class LoggingConfiguration:
target_bucket: str
target_prefix: str = ""
enabled: bool = True
def to_dict(self) -> Dict[str, Any]:
return {
"LoggingEnabled": {
"TargetBucket": self.target_bucket,
"TargetPrefix": self.target_prefix,
}
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> Optional["LoggingConfiguration"]:
logging_enabled = data.get("LoggingEnabled")
if not logging_enabled:
return None
return cls(
target_bucket=logging_enabled.get("TargetBucket", ""),
target_prefix=logging_enabled.get("TargetPrefix", ""),
enabled=True,
)
class AccessLoggingService:
def __init__(self, storage_root: Path, flush_interval: int = 60, max_buffer_size: int = 1000):
self.storage_root = storage_root
self.flush_interval = flush_interval
self.max_buffer_size = max_buffer_size
self._configs: Dict[str, LoggingConfiguration] = {}
self._buffer: Dict[str, List[AccessLogEntry]] = {}
self._buffer_lock = threading.Lock()
self._shutdown = threading.Event()
self._storage = None
self._flush_thread = threading.Thread(target=self._flush_loop, name="access-log-flush", daemon=True)
self._flush_thread.start()
def set_storage(self, storage: Any) -> None:
self._storage = storage
def _config_path(self, bucket_name: str) -> Path:
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "logging.json"
def get_bucket_logging(self, bucket_name: str) -> Optional[LoggingConfiguration]:
if bucket_name in self._configs:
return self._configs[bucket_name]
config_path = self._config_path(bucket_name)
if not config_path.exists():
return None
try:
data = json.loads(config_path.read_text(encoding="utf-8"))
config = LoggingConfiguration.from_dict(data)
if config:
self._configs[bucket_name] = config
return config
except (json.JSONDecodeError, OSError) as e:
logger.warning(f"Failed to load logging config for {bucket_name}: {e}")
return None
def set_bucket_logging(self, bucket_name: str, config: LoggingConfiguration) -> None:
config_path = self._config_path(bucket_name)
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(json.dumps(config.to_dict(), indent=2), encoding="utf-8")
self._configs[bucket_name] = config
def delete_bucket_logging(self, bucket_name: str) -> None:
config_path = self._config_path(bucket_name)
try:
if config_path.exists():
config_path.unlink()
except OSError:
pass
self._configs.pop(bucket_name, None)
def log_request(
self,
bucket_name: str,
*,
operation: str,
key: str = "-",
remote_ip: str = "-",
requester: str = "-",
request_uri: str = "-",
http_status: int = 200,
error_code: str = "",
bytes_sent: int = 0,
object_size: int = 0,
total_time_ms: int = 0,
referrer: str = "-",
user_agent: str = "-",
version_id: str = "-",
request_id: str = "",
) -> None:
config = self.get_bucket_logging(bucket_name)
if not config or not config.enabled:
return
entry = AccessLogEntry(
bucket_owner="local-owner",
bucket=bucket_name,
remote_ip=remote_ip,
requester=requester,
request_id=request_id or uuid.uuid4().hex[:16].upper(),
operation=operation,
key=key,
request_uri=request_uri,
http_status=http_status,
error_code=error_code,
bytes_sent=bytes_sent,
object_size=object_size,
total_time_ms=total_time_ms,
referrer=referrer,
user_agent=user_agent,
version_id=version_id,
)
target_key = f"{config.target_bucket}:{config.target_prefix}"
should_flush = False
with self._buffer_lock:
if target_key not in self._buffer:
self._buffer[target_key] = []
self._buffer[target_key].append(entry)
should_flush = len(self._buffer[target_key]) >= self.max_buffer_size
if should_flush:
self._flush_buffer(target_key)
def _flush_loop(self) -> None:
while not self._shutdown.is_set():
self._shutdown.wait(timeout=self.flush_interval)
if not self._shutdown.is_set():
self._flush_all()
def _flush_all(self) -> None:
with self._buffer_lock:
targets = list(self._buffer.keys())
for target_key in targets:
self._flush_buffer(target_key)
def _flush_buffer(self, target_key: str) -> None:
with self._buffer_lock:
entries = self._buffer.pop(target_key, [])
if not entries or not self._storage:
return
try:
bucket_name, prefix = target_key.split(":", 1)
except ValueError:
logger.error(f"Invalid target key: {target_key}")
return
now = datetime.now(timezone.utc)
log_key = f"{prefix}{now.strftime('%Y-%m-%d-%H-%M-%S')}-{uuid.uuid4().hex[:8]}"
log_content = "\n".join(entry.to_log_line() for entry in entries) + "\n"
try:
stream = io.BytesIO(log_content.encode("utf-8"))
self._storage.put_object(bucket_name, log_key, stream, enforce_quota=False)
logger.info(f"Flushed {len(entries)} access log entries to {bucket_name}/{log_key}")
except Exception as e:
logger.error(f"Failed to write access log to {bucket_name}/{log_key}: {e}")
with self._buffer_lock:
if target_key not in self._buffer:
self._buffer[target_key] = []
self._buffer[target_key] = entries + self._buffer[target_key]
def flush(self) -> None:
self._flush_all()
def shutdown(self) -> None:
self._shutdown.set()
self._flush_all()
self._flush_thread.join(timeout=5.0)
def get_stats(self) -> Dict[str, Any]:
with self._buffer_lock:
buffered = sum(len(entries) for entries in self._buffer.values())
return {
"buffered_entries": buffered,
"target_buckets": len(self._buffer),
}

View File

@@ -1,204 +0,0 @@
from __future__ import annotations
import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional, Set
ACL_PERMISSION_FULL_CONTROL = "FULL_CONTROL"
ACL_PERMISSION_WRITE = "WRITE"
ACL_PERMISSION_WRITE_ACP = "WRITE_ACP"
ACL_PERMISSION_READ = "READ"
ACL_PERMISSION_READ_ACP = "READ_ACP"
ALL_PERMISSIONS = {
ACL_PERMISSION_FULL_CONTROL,
ACL_PERMISSION_WRITE,
ACL_PERMISSION_WRITE_ACP,
ACL_PERMISSION_READ,
ACL_PERMISSION_READ_ACP,
}
PERMISSION_TO_ACTIONS = {
ACL_PERMISSION_FULL_CONTROL: {"read", "write", "delete", "list", "share"},
ACL_PERMISSION_WRITE: {"write", "delete"},
ACL_PERMISSION_WRITE_ACP: {"share"},
ACL_PERMISSION_READ: {"read", "list"},
ACL_PERMISSION_READ_ACP: {"share"},
}
GRANTEE_ALL_USERS = "*"
GRANTEE_AUTHENTICATED_USERS = "authenticated"
@dataclass
class AclGrant:
grantee: str
permission: str
def to_dict(self) -> Dict[str, str]:
return {"grantee": self.grantee, "permission": self.permission}
@classmethod
def from_dict(cls, data: Dict[str, str]) -> "AclGrant":
return cls(grantee=data["grantee"], permission=data["permission"])
@dataclass
class Acl:
owner: str
grants: List[AclGrant] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
return {
"owner": self.owner,
"grants": [g.to_dict() for g in self.grants],
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "Acl":
return cls(
owner=data.get("owner", ""),
grants=[AclGrant.from_dict(g) for g in data.get("grants", [])],
)
def get_allowed_actions(self, principal_id: Optional[str], is_authenticated: bool = True) -> Set[str]:
actions: Set[str] = set()
if principal_id and principal_id == self.owner:
actions.update(PERMISSION_TO_ACTIONS[ACL_PERMISSION_FULL_CONTROL])
for grant in self.grants:
if grant.grantee == GRANTEE_ALL_USERS:
actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set()))
elif grant.grantee == GRANTEE_AUTHENTICATED_USERS and is_authenticated:
actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set()))
elif principal_id and grant.grantee == principal_id:
actions.update(PERMISSION_TO_ACTIONS.get(grant.permission, set()))
return actions
CANNED_ACLS = {
"private": lambda owner: Acl(
owner=owner,
grants=[AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL)],
),
"public-read": lambda owner: Acl(
owner=owner,
grants=[
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ),
],
),
"public-read-write": lambda owner: Acl(
owner=owner,
grants=[
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_READ),
AclGrant(grantee=GRANTEE_ALL_USERS, permission=ACL_PERMISSION_WRITE),
],
),
"authenticated-read": lambda owner: Acl(
owner=owner,
grants=[
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
AclGrant(grantee=GRANTEE_AUTHENTICATED_USERS, permission=ACL_PERMISSION_READ),
],
),
"bucket-owner-read": lambda owner: Acl(
owner=owner,
grants=[
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
],
),
"bucket-owner-full-control": lambda owner: Acl(
owner=owner,
grants=[
AclGrant(grantee=owner, permission=ACL_PERMISSION_FULL_CONTROL),
],
),
}
def create_canned_acl(canned_acl: str, owner: str) -> Acl:
factory = CANNED_ACLS.get(canned_acl)
if not factory:
return CANNED_ACLS["private"](owner)
return factory(owner)
class AclService:
def __init__(self, storage_root: Path):
self.storage_root = storage_root
self._bucket_acl_cache: Dict[str, Acl] = {}
def _bucket_acl_path(self, bucket_name: str) -> Path:
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / ".acl.json"
def get_bucket_acl(self, bucket_name: str) -> Optional[Acl]:
if bucket_name in self._bucket_acl_cache:
return self._bucket_acl_cache[bucket_name]
acl_path = self._bucket_acl_path(bucket_name)
if not acl_path.exists():
return None
try:
data = json.loads(acl_path.read_text(encoding="utf-8"))
acl = Acl.from_dict(data)
self._bucket_acl_cache[bucket_name] = acl
return acl
except (OSError, json.JSONDecodeError):
return None
def set_bucket_acl(self, bucket_name: str, acl: Acl) -> None:
acl_path = self._bucket_acl_path(bucket_name)
acl_path.parent.mkdir(parents=True, exist_ok=True)
acl_path.write_text(json.dumps(acl.to_dict(), indent=2), encoding="utf-8")
self._bucket_acl_cache[bucket_name] = acl
def set_bucket_canned_acl(self, bucket_name: str, canned_acl: str, owner: str) -> Acl:
acl = create_canned_acl(canned_acl, owner)
self.set_bucket_acl(bucket_name, acl)
return acl
def delete_bucket_acl(self, bucket_name: str) -> None:
acl_path = self._bucket_acl_path(bucket_name)
if acl_path.exists():
acl_path.unlink()
self._bucket_acl_cache.pop(bucket_name, None)
def evaluate_bucket_acl(
self,
bucket_name: str,
principal_id: Optional[str],
action: str,
is_authenticated: bool = True,
) -> bool:
acl = self.get_bucket_acl(bucket_name)
if not acl:
return False
allowed_actions = acl.get_allowed_actions(principal_id, is_authenticated)
return action in allowed_actions
def get_object_acl(self, bucket_name: str, object_key: str, object_metadata: Dict[str, Any]) -> Optional[Acl]:
acl_data = object_metadata.get("__acl__")
if not acl_data:
return None
try:
return Acl.from_dict(acl_data)
except (TypeError, KeyError):
return None
def create_object_acl_metadata(self, acl: Acl) -> Dict[str, Any]:
return {"__acl__": acl.to_dict()}
def evaluate_object_acl(
self,
object_metadata: Dict[str, Any],
principal_id: Optional[str],
action: str,
is_authenticated: bool = True,
) -> bool:
acl = self.get_object_acl("", "", object_metadata)
if not acl:
return False
allowed_actions = acl.get_allowed_actions(principal_id, is_authenticated)
return action in allowed_actions

View File

@@ -1,778 +0,0 @@
from __future__ import annotations
import ipaddress
import json
import logging
import re
import socket
import time
from typing import Any, Dict, Optional, Tuple
from urllib.parse import urlparse
import requests
from flask import Blueprint, Response, current_app, jsonify, request
from .connections import ConnectionStore
from .extensions import limiter
from .iam import IamError, Principal
from .replication import ReplicationManager
from .site_registry import PeerSite, SiteInfo, SiteRegistry
from .website_domains import WebsiteDomainStore, normalize_domain, is_valid_domain
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
"""Check if a URL is safe to make requests to (not internal/private).
Args:
url: The URL to check.
allow_internal: If True, allows internal/private IP addresses.
Use for self-hosted deployments on internal networks.
"""
try:
parsed = urlparse(url)
hostname = parsed.hostname
if not hostname:
return False
cloud_metadata_hosts = {
"metadata.google.internal",
"169.254.169.254",
}
if hostname.lower() in cloud_metadata_hosts:
return False
if allow_internal:
return True
blocked_hosts = {
"localhost",
"127.0.0.1",
"0.0.0.0",
"::1",
"[::1]",
}
if hostname.lower() in blocked_hosts:
return False
try:
resolved_ip = socket.gethostbyname(hostname)
ip = ipaddress.ip_address(resolved_ip)
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
return False
except (socket.gaierror, ValueError):
return False
return True
except Exception:
return False
def _validate_endpoint(endpoint: str) -> Optional[str]:
"""Validate endpoint URL format. Returns error message or None."""
try:
parsed = urlparse(endpoint)
if not parsed.scheme or parsed.scheme not in ("http", "https"):
return "Endpoint must be http or https URL"
if not parsed.netloc:
return "Endpoint must have a host"
return None
except Exception:
return "Invalid endpoint URL"
def _validate_priority(priority: Any) -> Optional[str]:
"""Validate priority value. Returns error message or None."""
try:
p = int(priority)
if p < 0 or p > 1000:
return "Priority must be between 0 and 1000"
return None
except (TypeError, ValueError):
return "Priority must be an integer"
def _validate_region(region: str) -> Optional[str]:
"""Validate region format. Returns error message or None."""
if not re.match(r"^[a-z]{2,}-[a-z]+-\d+$", region):
return "Region must match format like us-east-1"
return None
def _validate_site_id(site_id: str) -> Optional[str]:
"""Validate site_id format. Returns error message or None."""
if not site_id or len(site_id) > 63:
return "site_id must be 1-63 characters"
if not re.match(r'^[a-zA-Z0-9][a-zA-Z0-9_-]*$', site_id):
return "site_id must start with alphanumeric and contain only alphanumeric, hyphens, underscores"
return None
logger = logging.getLogger(__name__)
admin_api_bp = Blueprint("admin_api", __name__, url_prefix="/admin")
def _require_principal() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
from .s3_api import _require_principal as s3_require_principal
return s3_require_principal()
def _require_admin() -> Tuple[Optional[Principal], Optional[Tuple[Dict[str, Any], int]]]:
principal, error = _require_principal()
if error:
return None, error
try:
_iam().authorize(principal, None, "iam:*")
return principal, None
except IamError:
return None, _json_error("AccessDenied", "Admin access required", 403)
def _site_registry() -> SiteRegistry:
return current_app.extensions["site_registry"]
def _connections() -> ConnectionStore:
return current_app.extensions["connections"]
def _replication() -> ReplicationManager:
return current_app.extensions["replication"]
def _iam():
return current_app.extensions["iam"]
def _json_error(code: str, message: str, status: int) -> Tuple[Dict[str, Any], int]:
return {"error": {"code": code, "message": message}}, status
def _get_admin_rate_limit() -> str:
return current_app.config.get("RATE_LIMIT_ADMIN", "60 per minute")
@admin_api_bp.route("/site", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def get_local_site():
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
local_site = registry.get_local_site()
if local_site:
return jsonify(local_site.to_dict())
config_site_id = current_app.config.get("SITE_ID")
config_endpoint = current_app.config.get("SITE_ENDPOINT")
if config_site_id:
return jsonify({
"site_id": config_site_id,
"endpoint": config_endpoint or "",
"region": current_app.config.get("SITE_REGION", "us-east-1"),
"priority": current_app.config.get("SITE_PRIORITY", 100),
"display_name": config_site_id,
"source": "environment",
})
return _json_error("NotFound", "Local site not configured", 404)
@admin_api_bp.route("/site", methods=["PUT"])
@limiter.limit(lambda: _get_admin_rate_limit())
def update_local_site():
principal, error = _require_admin()
if error:
return error
payload = request.get_json(silent=True) or {}
site_id = payload.get("site_id")
endpoint = payload.get("endpoint")
if not site_id:
return _json_error("ValidationError", "site_id is required", 400)
site_id_error = _validate_site_id(site_id)
if site_id_error:
return _json_error("ValidationError", site_id_error, 400)
if endpoint:
endpoint_error = _validate_endpoint(endpoint)
if endpoint_error:
return _json_error("ValidationError", endpoint_error, 400)
if "priority" in payload:
priority_error = _validate_priority(payload["priority"])
if priority_error:
return _json_error("ValidationError", priority_error, 400)
if "region" in payload:
region_error = _validate_region(payload["region"])
if region_error:
return _json_error("ValidationError", region_error, 400)
registry = _site_registry()
existing = registry.get_local_site()
site = SiteInfo(
site_id=site_id,
endpoint=endpoint or "",
region=payload.get("region", "us-east-1"),
priority=payload.get("priority", 100),
display_name=payload.get("display_name", site_id),
created_at=existing.created_at if existing else None,
)
registry.set_local_site(site)
logger.info("Local site updated", extra={"site_id": site_id, "principal": principal.access_key})
return jsonify(site.to_dict())
@admin_api_bp.route("/sites", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def list_all_sites():
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
local = registry.get_local_site()
peers = registry.list_peers()
result = {
"local": local.to_dict() if local else None,
"peers": [peer.to_dict() for peer in peers],
"total_peers": len(peers),
}
return jsonify(result)
@admin_api_bp.route("/sites", methods=["POST"])
@limiter.limit(lambda: _get_admin_rate_limit())
def register_peer_site():
principal, error = _require_admin()
if error:
return error
payload = request.get_json(silent=True) or {}
site_id = payload.get("site_id")
endpoint = payload.get("endpoint")
if not site_id:
return _json_error("ValidationError", "site_id is required", 400)
site_id_error = _validate_site_id(site_id)
if site_id_error:
return _json_error("ValidationError", site_id_error, 400)
if not endpoint:
return _json_error("ValidationError", "endpoint is required", 400)
endpoint_error = _validate_endpoint(endpoint)
if endpoint_error:
return _json_error("ValidationError", endpoint_error, 400)
region = payload.get("region", "us-east-1")
region_error = _validate_region(region)
if region_error:
return _json_error("ValidationError", region_error, 400)
priority = payload.get("priority", 100)
priority_error = _validate_priority(priority)
if priority_error:
return _json_error("ValidationError", priority_error, 400)
registry = _site_registry()
if registry.get_peer(site_id):
return _json_error("AlreadyExists", f"Peer site '{site_id}' already exists", 409)
connection_id = payload.get("connection_id")
if connection_id:
if not _connections().get(connection_id):
return _json_error("ValidationError", f"Connection '{connection_id}' not found", 400)
peer = PeerSite(
site_id=site_id,
endpoint=endpoint,
region=region,
priority=int(priority),
display_name=payload.get("display_name", site_id),
connection_id=connection_id,
)
registry.add_peer(peer)
logger.info("Peer site registered", extra={"site_id": site_id, "principal": principal.access_key})
return jsonify(peer.to_dict()), 201
@admin_api_bp.route("/sites/<site_id>", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def get_peer_site(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer:
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
return jsonify(peer.to_dict())
@admin_api_bp.route("/sites/<site_id>", methods=["PUT"])
@limiter.limit(lambda: _get_admin_rate_limit())
def update_peer_site(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
existing = registry.get_peer(site_id)
if not existing:
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
payload = request.get_json(silent=True) or {}
if "endpoint" in payload:
endpoint_error = _validate_endpoint(payload["endpoint"])
if endpoint_error:
return _json_error("ValidationError", endpoint_error, 400)
if "priority" in payload:
priority_error = _validate_priority(payload["priority"])
if priority_error:
return _json_error("ValidationError", priority_error, 400)
if "region" in payload:
region_error = _validate_region(payload["region"])
if region_error:
return _json_error("ValidationError", region_error, 400)
if "connection_id" in payload:
if payload["connection_id"] and not _connections().get(payload["connection_id"]):
return _json_error("ValidationError", f"Connection '{payload['connection_id']}' not found", 400)
peer = PeerSite(
site_id=site_id,
endpoint=payload.get("endpoint", existing.endpoint),
region=payload.get("region", existing.region),
priority=payload.get("priority", existing.priority),
display_name=payload.get("display_name", existing.display_name),
connection_id=payload.get("connection_id", existing.connection_id),
created_at=existing.created_at,
is_healthy=existing.is_healthy,
last_health_check=existing.last_health_check,
)
registry.update_peer(peer)
logger.info("Peer site updated", extra={"site_id": site_id, "principal": principal.access_key})
return jsonify(peer.to_dict())
@admin_api_bp.route("/sites/<site_id>", methods=["DELETE"])
@limiter.limit(lambda: _get_admin_rate_limit())
def delete_peer_site(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
if not registry.delete_peer(site_id):
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
logger.info("Peer site deleted", extra={"site_id": site_id, "principal": principal.access_key})
return Response(status=204)
@admin_api_bp.route("/sites/<site_id>/health", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def check_peer_health(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer:
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
is_healthy = False
error_message = None
if peer.connection_id:
connection = _connections().get(peer.connection_id)
if connection:
is_healthy = _replication().check_endpoint_health(connection)
else:
error_message = f"Connection '{peer.connection_id}' not found"
else:
error_message = "No connection configured for this peer"
registry.update_health(site_id, is_healthy)
result = {
"site_id": site_id,
"is_healthy": is_healthy,
"checked_at": time.time(),
}
if error_message:
result["error"] = error_message
return jsonify(result)
@admin_api_bp.route("/topology", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def get_topology():
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
local = registry.get_local_site()
peers = registry.list_peers()
sites = []
if local:
sites.append({
**local.to_dict(),
"is_local": True,
"is_healthy": True,
})
for peer in peers:
sites.append({
**peer.to_dict(),
"is_local": False,
})
sites.sort(key=lambda s: s.get("priority", 100))
return jsonify({
"sites": sites,
"total": len(sites),
"healthy_count": sum(1 for s in sites if s.get("is_healthy")),
})
@admin_api_bp.route("/sites/<site_id>/bidirectional-status", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def check_bidirectional_status(site_id: str):
principal, error = _require_admin()
if error:
return error
registry = _site_registry()
peer = registry.get_peer(site_id)
if not peer:
return _json_error("NotFound", f"Peer site '{site_id}' not found", 404)
local_site = registry.get_local_site()
replication = _replication()
local_rules = replication.list_rules()
local_bidir_rules = []
for rule in local_rules:
if rule.target_connection_id == peer.connection_id and rule.mode == "bidirectional":
local_bidir_rules.append({
"bucket_name": rule.bucket_name,
"target_bucket": rule.target_bucket,
"enabled": rule.enabled,
})
result = {
"site_id": site_id,
"local_site_id": local_site.site_id if local_site else None,
"local_endpoint": local_site.endpoint if local_site else None,
"local_bidirectional_rules": local_bidir_rules,
"local_site_sync_enabled": current_app.config.get("SITE_SYNC_ENABLED", False),
"remote_status": None,
"issues": [],
"is_fully_configured": False,
}
if not local_site or not local_site.site_id:
result["issues"].append({
"code": "NO_LOCAL_SITE_ID",
"message": "Local site identity not configured",
"severity": "error",
})
if not local_site or not local_site.endpoint:
result["issues"].append({
"code": "NO_LOCAL_ENDPOINT",
"message": "Local site endpoint not configured (remote site cannot reach back)",
"severity": "error",
})
if not peer.connection_id:
result["issues"].append({
"code": "NO_CONNECTION",
"message": "No connection configured for this peer",
"severity": "error",
})
return jsonify(result)
connection = _connections().get(peer.connection_id)
if not connection:
result["issues"].append({
"code": "CONNECTION_NOT_FOUND",
"message": f"Connection '{peer.connection_id}' not found",
"severity": "error",
})
return jsonify(result)
if not local_bidir_rules:
result["issues"].append({
"code": "NO_LOCAL_BIDIRECTIONAL_RULES",
"message": "No bidirectional replication rules configured on this site",
"severity": "warning",
})
if not result["local_site_sync_enabled"]:
result["issues"].append({
"code": "SITE_SYNC_DISABLED",
"message": "Site sync worker is disabled (SITE_SYNC_ENABLED=false). Pull operations will not work.",
"severity": "warning",
})
if not replication.check_endpoint_health(connection):
result["issues"].append({
"code": "REMOTE_UNREACHABLE",
"message": "Remote endpoint is not reachable",
"severity": "error",
})
return jsonify(result)
allow_internal = current_app.config.get("ALLOW_INTERNAL_ENDPOINTS", False)
if not _is_safe_url(peer.endpoint, allow_internal=allow_internal):
result["issues"].append({
"code": "ENDPOINT_NOT_ALLOWED",
"message": "Peer endpoint points to cloud metadata service (SSRF protection)",
"severity": "error",
})
return jsonify(result)
try:
admin_url = peer.endpoint.rstrip("/") + "/admin/sites"
resp = requests.get(
admin_url,
timeout=10,
headers={
"Accept": "application/json",
"X-Access-Key": connection.access_key,
"X-Secret-Key": connection.secret_key,
},
)
if resp.status_code == 200:
try:
remote_data = resp.json()
if not isinstance(remote_data, dict):
raise ValueError("Expected JSON object")
remote_local = remote_data.get("local")
if remote_local is not None and not isinstance(remote_local, dict):
raise ValueError("Expected 'local' to be an object")
remote_peers = remote_data.get("peers", [])
if not isinstance(remote_peers, list):
raise ValueError("Expected 'peers' to be a list")
except (ValueError, json.JSONDecodeError) as e:
logger.warning("Invalid JSON from remote admin API: %s", e)
result["remote_status"] = {"reachable": True, "invalid_response": True}
result["issues"].append({
"code": "REMOTE_INVALID_RESPONSE",
"message": "Remote admin API returned invalid JSON",
"severity": "warning",
})
return jsonify(result)
result["remote_status"] = {
"reachable": True,
"local_site": remote_local,
"site_sync_enabled": None,
"has_peer_for_us": False,
"peer_connection_configured": False,
"has_bidirectional_rules_for_us": False,
}
for rp in remote_peers:
if not isinstance(rp, dict):
continue
if local_site and (
rp.get("site_id") == local_site.site_id or
rp.get("endpoint") == local_site.endpoint
):
result["remote_status"]["has_peer_for_us"] = True
result["remote_status"]["peer_connection_configured"] = bool(rp.get("connection_id"))
break
if not result["remote_status"]["has_peer_for_us"]:
result["issues"].append({
"code": "REMOTE_NO_PEER_FOR_US",
"message": "Remote site does not have this site registered as a peer",
"severity": "error",
})
elif not result["remote_status"]["peer_connection_configured"]:
result["issues"].append({
"code": "REMOTE_NO_CONNECTION_FOR_US",
"message": "Remote site has us as peer but no connection configured (cannot push back)",
"severity": "error",
})
elif resp.status_code == 401 or resp.status_code == 403:
result["remote_status"] = {
"reachable": True,
"admin_access_denied": True,
}
result["issues"].append({
"code": "REMOTE_ADMIN_ACCESS_DENIED",
"message": "Cannot verify remote configuration (admin access denied)",
"severity": "warning",
})
else:
result["remote_status"] = {
"reachable": True,
"admin_api_error": resp.status_code,
}
result["issues"].append({
"code": "REMOTE_ADMIN_API_ERROR",
"message": f"Remote admin API returned status {resp.status_code}",
"severity": "warning",
})
except requests.RequestException as e:
logger.warning("Remote admin API unreachable: %s", e)
result["remote_status"] = {
"reachable": False,
"error": "Connection failed",
}
result["issues"].append({
"code": "REMOTE_ADMIN_UNREACHABLE",
"message": "Could not reach remote admin API",
"severity": "warning",
})
except Exception as e:
logger.warning("Error checking remote bidirectional status: %s", e, exc_info=True)
result["issues"].append({
"code": "VERIFICATION_ERROR",
"message": "Internal error during verification",
"severity": "warning",
})
error_issues = [i for i in result["issues"] if i["severity"] == "error"]
result["is_fully_configured"] = len(error_issues) == 0 and len(local_bidir_rules) > 0
return jsonify(result)
def _website_domains() -> WebsiteDomainStore:
return current_app.extensions["website_domains"]
def _storage():
return current_app.extensions["object_storage"]
@admin_api_bp.route("/website-domains", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def list_website_domains():
principal, error = _require_admin()
if error:
return error
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
return jsonify(_website_domains().list_all())
@admin_api_bp.route("/website-domains", methods=["POST"])
@limiter.limit(lambda: _get_admin_rate_limit())
def create_website_domain():
principal, error = _require_admin()
if error:
return error
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
payload = request.get_json(silent=True) or {}
domain = normalize_domain(payload.get("domain") or "")
bucket = (payload.get("bucket") or "").strip()
if not domain:
return _json_error("ValidationError", "domain is required", 400)
if not is_valid_domain(domain):
return _json_error("ValidationError", f"Invalid domain: '{domain}'", 400)
if not bucket:
return _json_error("ValidationError", "bucket is required", 400)
storage = _storage()
if not storage.bucket_exists(bucket):
return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
store = _website_domains()
existing = store.get_bucket(domain)
if existing:
return _json_error("Conflict", f"Domain '{domain}' is already mapped to bucket '{existing}'", 409)
store.set_mapping(domain, bucket)
logger.info("Website domain mapping created: %s -> %s", domain, bucket)
return jsonify({"domain": domain, "bucket": bucket}), 201
@admin_api_bp.route("/website-domains/<domain>", methods=["GET"])
@limiter.limit(lambda: _get_admin_rate_limit())
def get_website_domain(domain: str):
principal, error = _require_admin()
if error:
return error
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
domain = normalize_domain(domain)
bucket = _website_domains().get_bucket(domain)
if not bucket:
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
return jsonify({"domain": domain, "bucket": bucket})
@admin_api_bp.route("/website-domains/<domain>", methods=["PUT"])
@limiter.limit(lambda: _get_admin_rate_limit())
def update_website_domain(domain: str):
principal, error = _require_admin()
if error:
return error
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
domain = normalize_domain(domain)
payload = request.get_json(silent=True) or {}
bucket = (payload.get("bucket") or "").strip()
if not bucket:
return _json_error("ValidationError", "bucket is required", 400)
storage = _storage()
if not storage.bucket_exists(bucket):
return _json_error("NoSuchBucket", f"Bucket '{bucket}' does not exist", 404)
store = _website_domains()
if not store.get_bucket(domain):
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
store.set_mapping(domain, bucket)
logger.info("Website domain mapping updated: %s -> %s", domain, bucket)
return jsonify({"domain": domain, "bucket": bucket})
@admin_api_bp.route("/website-domains/<domain>", methods=["DELETE"])
@limiter.limit(lambda: _get_admin_rate_limit())
def delete_website_domain(domain: str):
principal, error = _require_admin()
if error:
return error
if not current_app.config.get("WEBSITE_HOSTING_ENABLED", False):
return _json_error("InvalidRequest", "Website hosting is not enabled", 400)
domain = normalize_domain(domain)
if not _website_domains().delete_mapping(domain):
return _json_error("NotFound", f"No mapping found for domain '{domain}'", 404)
logger.info("Website domain mapping deleted: %s", domain)
return Response(status=204)

View File

@@ -1,88 +1,23 @@
"""Bucket policy loader/enforcer with a subset of AWS semantics."""
from __future__ import annotations from __future__ import annotations
import ipaddress
import json import json
import re from dataclasses import dataclass
import time from fnmatch import fnmatch
from dataclasses import dataclass, field
from fnmatch import fnmatch, translate
from functools import lru_cache
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Pattern, Sequence, Tuple from typing import Any, Dict, Iterable, List, Optional, Sequence
RESOURCE_PREFIX = "arn:aws:s3:::" RESOURCE_PREFIX = "arn:aws:s3:::"
@lru_cache(maxsize=256)
def _compile_pattern(pattern: str) -> Pattern[str]:
return re.compile(translate(pattern), re.IGNORECASE)
def _match_string_like(value: str, pattern: str) -> bool:
compiled = _compile_pattern(pattern)
return bool(compiled.match(value))
def _ip_in_cidr(ip_str: str, cidr: str) -> bool:
try:
ip = ipaddress.ip_address(ip_str)
network = ipaddress.ip_network(cidr, strict=False)
return ip in network
except ValueError:
return False
def _evaluate_condition_operator(
operator: str,
condition_key: str,
condition_values: List[str],
context: Dict[str, Any],
) -> bool:
context_value = context.get(condition_key)
op_lower = operator.lower()
if_exists = op_lower.endswith("ifexists")
if if_exists:
op_lower = op_lower[:-8]
if context_value is None:
return if_exists
context_value_str = str(context_value)
context_value_lower = context_value_str.lower()
if op_lower == "stringequals":
return context_value_str in condition_values
elif op_lower == "stringnotequals":
return context_value_str not in condition_values
elif op_lower == "stringequalsignorecase":
return context_value_lower in [v.lower() for v in condition_values]
elif op_lower == "stringnotequalsignorecase":
return context_value_lower not in [v.lower() for v in condition_values]
elif op_lower == "stringlike":
return any(_match_string_like(context_value_str, p) for p in condition_values)
elif op_lower == "stringnotlike":
return not any(_match_string_like(context_value_str, p) for p in condition_values)
elif op_lower == "ipaddress":
return any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values)
elif op_lower == "notipaddress":
return not any(_ip_in_cidr(context_value_str, cidr) for cidr in condition_values)
elif op_lower == "bool":
bool_val = context_value_lower in ("true", "1", "yes")
return str(bool_val).lower() in [v.lower() for v in condition_values]
elif op_lower == "null":
is_null = context_value is None or context_value == ""
expected_null = condition_values[0].lower() in ("true", "1", "yes") if condition_values else True
return is_null == expected_null
return True
ACTION_ALIASES = { ACTION_ALIASES = {
# List actions
"s3:listbucket": "list", "s3:listbucket": "list",
"s3:listallmybuckets": "list", "s3:listallmybuckets": "list",
"s3:listbucketversions": "list", "s3:listbucketversions": "list",
"s3:listmultipartuploads": "list", "s3:listmultipartuploads": "list",
"s3:listparts": "list", "s3:listparts": "list",
# Read actions
"s3:getobject": "read", "s3:getobject": "read",
"s3:getobjectversion": "read", "s3:getobjectversion": "read",
"s3:getobjecttagging": "read", "s3:getobjecttagging": "read",
@@ -91,6 +26,7 @@ ACTION_ALIASES = {
"s3:getbucketversioning": "read", "s3:getbucketversioning": "read",
"s3:headobject": "read", "s3:headobject": "read",
"s3:headbucket": "read", "s3:headbucket": "read",
# Write actions
"s3:putobject": "write", "s3:putobject": "write",
"s3:createbucket": "write", "s3:createbucket": "write",
"s3:putobjecttagging": "write", "s3:putobjecttagging": "write",
@@ -100,30 +36,26 @@ ACTION_ALIASES = {
"s3:completemultipartupload": "write", "s3:completemultipartupload": "write",
"s3:abortmultipartupload": "write", "s3:abortmultipartupload": "write",
"s3:copyobject": "write", "s3:copyobject": "write",
# Delete actions
"s3:deleteobject": "delete", "s3:deleteobject": "delete",
"s3:deleteobjectversion": "delete", "s3:deleteobjectversion": "delete",
"s3:deletebucket": "delete", "s3:deletebucket": "delete",
"s3:deleteobjecttagging": "delete", "s3:deleteobjecttagging": "delete",
# Share actions (ACL)
"s3:putobjectacl": "share", "s3:putobjectacl": "share",
"s3:putbucketacl": "share", "s3:putbucketacl": "share",
"s3:getbucketacl": "share", "s3:getbucketacl": "share",
# Policy actions
"s3:putbucketpolicy": "policy", "s3:putbucketpolicy": "policy",
"s3:getbucketpolicy": "policy", "s3:getbucketpolicy": "policy",
"s3:deletebucketpolicy": "policy", "s3:deletebucketpolicy": "policy",
# Replication actions
"s3:getreplicationconfiguration": "replication", "s3:getreplicationconfiguration": "replication",
"s3:putreplicationconfiguration": "replication", "s3:putreplicationconfiguration": "replication",
"s3:deletereplicationconfiguration": "replication", "s3:deletereplicationconfiguration": "replication",
"s3:replicateobject": "replication", "s3:replicateobject": "replication",
"s3:replicatetags": "replication", "s3:replicatetags": "replication",
"s3:replicatedelete": "replication", "s3:replicatedelete": "replication",
"s3:getlifecycleconfiguration": "lifecycle",
"s3:putlifecycleconfiguration": "lifecycle",
"s3:deletelifecycleconfiguration": "lifecycle",
"s3:getbucketlifecycle": "lifecycle",
"s3:putbucketlifecycle": "lifecycle",
"s3:getbucketcors": "cors",
"s3:putbucketcors": "cors",
"s3:deletebucketcors": "cors",
} }
@@ -201,20 +133,7 @@ class BucketPolicyStatement:
effect: str effect: str
principals: List[str] | str principals: List[str] | str
actions: List[str] actions: List[str]
resources: List[Tuple[str | None, str | None]] resources: List[tuple[str | None, str | None]]
conditions: Dict[str, Dict[str, List[str]]] = field(default_factory=dict)
_compiled_patterns: List[Tuple[str | None, Optional[Pattern[str]]]] | None = None
def _get_compiled_patterns(self) -> List[Tuple[str | None, Optional[Pattern[str]]]]:
if self._compiled_patterns is None:
self._compiled_patterns = []
for resource_bucket, key_pattern in self.resources:
if key_pattern is None:
self._compiled_patterns.append((resource_bucket, None))
else:
regex_pattern = translate(key_pattern)
self._compiled_patterns.append((resource_bucket, re.compile(regex_pattern)))
return self._compiled_patterns
def matches_principal(self, access_key: Optional[str]) -> bool: def matches_principal(self, access_key: Optional[str]) -> bool:
if self.principals == "*": if self.principals == "*":
@@ -230,29 +149,18 @@ class BucketPolicyStatement:
def matches_resource(self, bucket: Optional[str], object_key: Optional[str]) -> bool: def matches_resource(self, bucket: Optional[str], object_key: Optional[str]) -> bool:
bucket = (bucket or "*").lower() bucket = (bucket or "*").lower()
key = object_key or "" key = object_key or ""
for resource_bucket, compiled_pattern in self._get_compiled_patterns(): for resource_bucket, key_pattern in self.resources:
resource_bucket = (resource_bucket or "*").lower() resource_bucket = (resource_bucket or "*").lower()
if resource_bucket not in {"*", bucket}: if resource_bucket not in {"*", bucket}:
continue continue
if compiled_pattern is None: if key_pattern is None:
if not key: if not key:
return True return True
continue continue
if compiled_pattern.match(key): if fnmatch(key, key_pattern):
return True return True
return False return False
def matches_condition(self, context: Optional[Dict[str, Any]]) -> bool:
if not self.conditions:
return True
if context is None:
context = {}
for operator, key_values in self.conditions.items():
for condition_key, condition_values in key_values.items():
if not _evaluate_condition_operator(operator, condition_key, condition_values, context):
return False
return True
class BucketPolicyStore: class BucketPolicyStore:
"""Loads bucket policies from disk and evaluates statements.""" """Loads bucket policies from disk and evaluates statements."""
@@ -266,16 +174,8 @@ class BucketPolicyStore:
self._policies: Dict[str, List[BucketPolicyStatement]] = {} self._policies: Dict[str, List[BucketPolicyStatement]] = {}
self._load() self._load()
self._last_mtime = self._current_mtime() self._last_mtime = self._current_mtime()
# Performance: Avoid stat() on every request
self._last_stat_check = 0.0
self._stat_check_interval = 1.0 # Only check mtime every 1 second
def maybe_reload(self) -> None: def maybe_reload(self) -> None:
# Performance: Skip stat check if we checked recently
now = time.time()
if now - self._last_stat_check < self._stat_check_interval:
return
self._last_stat_check = now
current = self._current_mtime() current = self._current_mtime()
if current is None or current == self._last_mtime: if current is None or current == self._last_mtime:
return return
@@ -288,13 +188,13 @@ class BucketPolicyStore:
except FileNotFoundError: except FileNotFoundError:
return None return None
# ------------------------------------------------------------------
def evaluate( def evaluate(
self, self,
access_key: Optional[str], access_key: Optional[str],
bucket: Optional[str], bucket: Optional[str],
object_key: Optional[str], object_key: Optional[str],
action: str, action: str,
context: Optional[Dict[str, Any]] = None,
) -> str | None: ) -> str | None:
bucket = (bucket or "").lower() bucket = (bucket or "").lower()
statements = self._policies.get(bucket) or [] statements = self._policies.get(bucket) or []
@@ -306,8 +206,6 @@ class BucketPolicyStore:
continue continue
if not statement.matches_resource(bucket, object_key): if not statement.matches_resource(bucket, object_key):
continue continue
if not statement.matches_condition(context):
continue
if statement.effect == "deny": if statement.effect == "deny":
return "deny" return "deny"
decision = "allow" decision = "allow"
@@ -331,6 +229,7 @@ class BucketPolicyStore:
self._policies.pop(bucket, None) self._policies.pop(bucket, None)
self._persist() self._persist()
# ------------------------------------------------------------------
def _load(self) -> None: def _load(self) -> None:
try: try:
content = self.policy_path.read_text(encoding='utf-8') content = self.policy_path.read_text(encoding='utf-8')
@@ -372,7 +271,6 @@ class BucketPolicyStore:
if not resources: if not resources:
continue continue
effect = statement.get("Effect", "Allow").lower() effect = statement.get("Effect", "Allow").lower()
conditions = self._normalize_conditions(statement.get("Condition", {}))
statements.append( statements.append(
BucketPolicyStatement( BucketPolicyStatement(
sid=statement.get("Sid"), sid=statement.get("Sid"),
@@ -380,24 +278,6 @@ class BucketPolicyStore:
principals=principals, principals=principals,
actions=actions or ["*"], actions=actions or ["*"],
resources=resources, resources=resources,
conditions=conditions,
) )
) )
return statements return statements
def _normalize_conditions(self, condition_block: Dict[str, Any]) -> Dict[str, Dict[str, List[str]]]:
if not condition_block or not isinstance(condition_block, dict):
return {}
normalized: Dict[str, Dict[str, List[str]]] = {}
for operator, key_values in condition_block.items():
if not isinstance(key_values, dict):
continue
normalized[operator] = {}
for cond_key, cond_values in key_values.items():
if isinstance(cond_values, str):
normalized[operator][cond_key] = [cond_values]
elif isinstance(cond_values, list):
normalized[operator][cond_key] = [str(v) for v in cond_values]
else:
normalized[operator][cond_key] = [str(cond_values)]
return normalized

View File

@@ -1,109 +0,0 @@
from __future__ import annotations
import gzip
import io
from typing import Callable, Iterable, List, Tuple
COMPRESSIBLE_MIMES = frozenset([
'application/json',
'application/javascript',
'application/xml',
'text/html',
'text/css',
'text/plain',
'text/xml',
'text/javascript',
'application/x-ndjson',
])
MIN_SIZE_FOR_COMPRESSION = 500
class GzipMiddleware:
def __init__(self, app: Callable, compression_level: int = 6, min_size: int = MIN_SIZE_FOR_COMPRESSION):
self.app = app
self.compression_level = compression_level
self.min_size = min_size
def __call__(self, environ: dict, start_response: Callable) -> Iterable[bytes]:
accept_encoding = environ.get('HTTP_ACCEPT_ENCODING', '')
if 'gzip' not in accept_encoding.lower():
return self.app(environ, start_response)
response_started = False
status_code = None
response_headers: List[Tuple[str, str]] = []
content_type = None
content_length = None
should_compress = False
passthrough = False
exc_info_holder = [None]
def custom_start_response(status: str, headers: List[Tuple[str, str]], exc_info=None):
nonlocal response_started, status_code, response_headers, content_type, content_length, should_compress, passthrough
response_started = True
status_code = int(status.split(' ', 1)[0])
response_headers = list(headers)
exc_info_holder[0] = exc_info
for name, value in headers:
name_lower = name.lower()
if name_lower == 'content-type':
content_type = value.split(';')[0].strip().lower()
elif name_lower == 'content-length':
try:
content_length = int(value)
except (ValueError, TypeError):
pass
elif name_lower == 'content-encoding':
passthrough = True
return start_response(status, headers, exc_info)
elif name_lower == 'x-stream-response':
passthrough = True
return start_response(status, headers, exc_info)
if content_type and content_type in COMPRESSIBLE_MIMES:
if content_length is None or content_length >= self.min_size:
should_compress = True
else:
passthrough = True
return start_response(status, headers, exc_info)
return None
app_iter = self.app(environ, custom_start_response)
if passthrough:
return app_iter
response_body = b''.join(app_iter)
if not response_started:
return [response_body]
if should_compress and len(response_body) >= self.min_size:
buf = io.BytesIO()
with gzip.GzipFile(fileobj=buf, mode='wb', compresslevel=self.compression_level) as gz:
gz.write(response_body)
compressed = buf.getvalue()
if len(compressed) < len(response_body):
response_body = compressed
new_headers = []
for name, value in response_headers:
if name.lower() not in ('content-length', 'content-encoding'):
new_headers.append((name, value))
new_headers.append(('Content-Encoding', 'gzip'))
new_headers.append(('Content-Length', str(len(response_body))))
new_headers.append(('Vary', 'Accept-Encoding'))
response_headers = new_headers
status_str = f"{status_code} " + {
200: "OK", 201: "Created", 204: "No Content", 206: "Partial Content",
301: "Moved Permanently", 302: "Found", 304: "Not Modified",
400: "Bad Request", 401: "Unauthorized", 403: "Forbidden", 404: "Not Found",
405: "Method Not Allowed", 409: "Conflict", 500: "Internal Server Error",
}.get(status_code, "Unknown")
start_response(status_str, response_headers, exc_info_holder[0])
return [response_body]

View File

@@ -1,7 +1,7 @@
"""Configuration helpers for the S3 clone application."""
from __future__ import annotations from __future__ import annotations
import os import os
import re
import secrets import secrets
import shutil import shutil
import sys import sys
@@ -10,30 +10,6 @@ from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Optional from typing import Any, Dict, Optional
import psutil
def _calculate_auto_threads() -> int:
cpu_count = psutil.cpu_count(logical=True) or 4
return max(1, min(cpu_count * 2, 64))
def _calculate_auto_connection_limit() -> int:
available_mb = psutil.virtual_memory().available / (1024 * 1024)
calculated = int(available_mb / 5)
return max(20, min(calculated, 1000))
def _calculate_auto_backlog(connection_limit: int) -> int:
return max(64, min(connection_limit * 2, 4096))
def _validate_rate_limit(value: str) -> str:
pattern = r"^\d+\s+per\s+(second|minute|hour|day)$"
if not re.match(pattern, value):
raise ValueError(f"Invalid rate limit format: {value}. Expected format: '200 per minute'")
return value
if getattr(sys, "frozen", False): if getattr(sys, "frozen", False):
# Running in a PyInstaller bundle # Running in a PyInstaller bundle
PROJECT_ROOT = Path(sys._MEIPASS) PROJECT_ROOT = Path(sys._MEIPASS)
@@ -80,10 +56,6 @@ class AppConfig:
log_backup_count: int log_backup_count: int
ratelimit_default: str ratelimit_default: str
ratelimit_storage_uri: str ratelimit_storage_uri: str
ratelimit_list_buckets: str
ratelimit_bucket_ops: str
ratelimit_object_ops: str
ratelimit_head_ops: str
cors_origins: list[str] cors_origins: list[str]
cors_methods: list[str] cors_methods: list[str]
cors_allow_headers: list[str] cors_allow_headers: list[str]
@@ -96,60 +68,11 @@ class AppConfig:
stream_chunk_size: int stream_chunk_size: int
multipart_min_part_size: int multipart_min_part_size: int
bucket_stats_cache_ttl: int bucket_stats_cache_ttl: int
object_cache_ttl: int
encryption_enabled: bool encryption_enabled: bool
encryption_master_key_path: Path encryption_master_key_path: Path
kms_enabled: bool kms_enabled: bool
kms_keys_path: Path kms_keys_path: Path
default_encryption_algorithm: str default_encryption_algorithm: str
display_timezone: str
lifecycle_enabled: bool
lifecycle_interval_seconds: int
metrics_history_enabled: bool
metrics_history_retention_hours: int
metrics_history_interval_minutes: int
operation_metrics_enabled: bool
operation_metrics_interval_minutes: int
operation_metrics_retention_hours: int
server_threads: int
server_connection_limit: int
server_backlog: int
server_channel_timeout: int
server_threads_auto: bool
server_connection_limit_auto: bool
server_backlog_auto: bool
site_sync_enabled: bool
site_sync_interval_seconds: int
site_sync_batch_size: int
sigv4_timestamp_tolerance_seconds: int
presigned_url_min_expiry_seconds: int
presigned_url_max_expiry_seconds: int
replication_connect_timeout_seconds: int
replication_read_timeout_seconds: int
replication_max_retries: int
replication_streaming_threshold_bytes: int
replication_max_failures_per_bucket: int
site_sync_connect_timeout_seconds: int
site_sync_read_timeout_seconds: int
site_sync_max_retries: int
site_sync_clock_skew_tolerance_seconds: float
object_key_max_length_bytes: int
object_cache_max_size: int
bucket_config_cache_ttl_seconds: float
object_tag_limit: int
encryption_chunk_size_bytes: int
kms_generate_data_key_min_bytes: int
kms_generate_data_key_max_bytes: int
lifecycle_max_history_per_bucket: int
site_id: Optional[str]
site_endpoint: Optional[str]
site_region: str
site_priority: int
ratelimit_admin: str
num_trusted_proxies: int
allowed_redirect_hosts: list[str]
allow_internal_endpoints: bool
website_hosting_enabled: bool
@classmethod @classmethod
def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig": def from_env(cls, overrides: Optional[Dict[str, Any]] = None) -> "AppConfig":
@@ -159,7 +82,7 @@ class AppConfig:
return overrides.get(name, os.getenv(name, default)) return overrides.get(name, os.getenv(name, default))
storage_root = Path(_get("STORAGE_ROOT", PROJECT_ROOT / "data")).resolve() storage_root = Path(_get("STORAGE_ROOT", PROJECT_ROOT / "data")).resolve()
max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024)) max_upload_size = int(_get("MAX_UPLOAD_SIZE", 1024 * 1024 * 1024)) # 1 GiB default
ui_page_size = int(_get("UI_PAGE_SIZE", 100)) ui_page_size = int(_get("UI_PAGE_SIZE", 100))
auth_max_attempts = int(_get("AUTH_MAX_ATTEMPTS", 5)) auth_max_attempts = int(_get("AUTH_MAX_ATTEMPTS", 5))
auth_lockout_minutes = int(_get("AUTH_LOCKOUT_MINUTES", 15)) auth_lockout_minutes = int(_get("AUTH_LOCKOUT_MINUTES", 15))
@@ -167,8 +90,6 @@ class AppConfig:
secret_ttl_seconds = int(_get("SECRET_TTL_SECONDS", 300)) secret_ttl_seconds = int(_get("SECRET_TTL_SECONDS", 300))
stream_chunk_size = int(_get("STREAM_CHUNK_SIZE", 64 * 1024)) stream_chunk_size = int(_get("STREAM_CHUNK_SIZE", 64 * 1024))
multipart_min_part_size = int(_get("MULTIPART_MIN_PART_SIZE", 5 * 1024 * 1024)) multipart_min_part_size = int(_get("MULTIPART_MIN_PART_SIZE", 5 * 1024 * 1024))
lifecycle_enabled = _get("LIFECYCLE_ENABLED", "false").lower() in ("true", "1", "yes")
lifecycle_interval_seconds = int(_get("LIFECYCLE_INTERVAL_SECONDS", 3600))
default_secret = "dev-secret-key" default_secret = "dev-secret-key"
secret_key = str(_get("SECRET_KEY", default_secret)) secret_key = str(_get("SECRET_KEY", default_secret))
@@ -183,10 +104,6 @@ class AppConfig:
try: try:
secret_file.parent.mkdir(parents=True, exist_ok=True) secret_file.parent.mkdir(parents=True, exist_ok=True)
secret_file.write_text(generated) secret_file.write_text(generated)
try:
os.chmod(secret_file, 0o600)
except OSError:
pass
secret_key = generated secret_key = generated
except OSError: except OSError:
secret_key = generated secret_key = generated
@@ -222,12 +139,8 @@ class AppConfig:
log_path = log_dir / str(_get("LOG_FILE", "app.log")) log_path = log_dir / str(_get("LOG_FILE", "app.log"))
log_max_bytes = int(_get("LOG_MAX_BYTES", 5 * 1024 * 1024)) log_max_bytes = int(_get("LOG_MAX_BYTES", 5 * 1024 * 1024))
log_backup_count = int(_get("LOG_BACKUP_COUNT", 3)) log_backup_count = int(_get("LOG_BACKUP_COUNT", 3))
ratelimit_default = _validate_rate_limit(str(_get("RATE_LIMIT_DEFAULT", "200 per minute"))) ratelimit_default = str(_get("RATE_LIMIT_DEFAULT", "200 per minute"))
ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://")) ratelimit_storage_uri = str(_get("RATE_LIMIT_STORAGE_URI", "memory://"))
ratelimit_list_buckets = _validate_rate_limit(str(_get("RATE_LIMIT_LIST_BUCKETS", "60 per minute")))
ratelimit_bucket_ops = _validate_rate_limit(str(_get("RATE_LIMIT_BUCKET_OPS", "120 per minute")))
ratelimit_object_ops = _validate_rate_limit(str(_get("RATE_LIMIT_OBJECT_OPS", "240 per minute")))
ratelimit_head_ops = _validate_rate_limit(str(_get("RATE_LIMIT_HEAD_OPS", "100 per minute")))
def _csv(value: str, default: list[str]) -> list[str]: def _csv(value: str, default: list[str]) -> list[str]:
if not value: if not value:
@@ -240,85 +153,15 @@ class AppConfig:
cors_allow_headers = _csv(str(_get("CORS_ALLOW_HEADERS", "*")), ["*"]) cors_allow_headers = _csv(str(_get("CORS_ALLOW_HEADERS", "*")), ["*"])
cors_expose_headers = _csv(str(_get("CORS_EXPOSE_HEADERS", "*")), ["*"]) cors_expose_headers = _csv(str(_get("CORS_EXPOSE_HEADERS", "*")), ["*"])
session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30)) session_lifetime_days = int(_get("SESSION_LIFETIME_DAYS", 30))
bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60)) bucket_stats_cache_ttl = int(_get("BUCKET_STATS_CACHE_TTL", 60)) # Default 60 seconds
object_cache_ttl = int(_get("OBJECT_CACHE_TTL", 5))
# Encryption settings
encryption_enabled = str(_get("ENCRYPTION_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} encryption_enabled = str(_get("ENCRYPTION_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
encryption_keys_dir = storage_root / ".myfsio.sys" / "keys" encryption_keys_dir = storage_root / ".myfsio.sys" / "keys"
encryption_master_key_path = Path(_get("ENCRYPTION_MASTER_KEY_PATH", encryption_keys_dir / "master.key")).resolve() encryption_master_key_path = Path(_get("ENCRYPTION_MASTER_KEY_PATH", encryption_keys_dir / "master.key")).resolve()
kms_enabled = str(_get("KMS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"} kms_enabled = str(_get("KMS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
kms_keys_path = Path(_get("KMS_KEYS_PATH", encryption_keys_dir / "kms_keys.json")).resolve() kms_keys_path = Path(_get("KMS_KEYS_PATH", encryption_keys_dir / "kms_keys.json")).resolve()
default_encryption_algorithm = str(_get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256")) default_encryption_algorithm = str(_get("DEFAULT_ENCRYPTION_ALGORITHM", "AES256"))
display_timezone = str(_get("DISPLAY_TIMEZONE", "UTC"))
metrics_history_enabled = str(_get("METRICS_HISTORY_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
metrics_history_retention_hours = int(_get("METRICS_HISTORY_RETENTION_HOURS", 24))
metrics_history_interval_minutes = int(_get("METRICS_HISTORY_INTERVAL_MINUTES", 5))
operation_metrics_enabled = str(_get("OPERATION_METRICS_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
operation_metrics_interval_minutes = int(_get("OPERATION_METRICS_INTERVAL_MINUTES", 5))
operation_metrics_retention_hours = int(_get("OPERATION_METRICS_RETENTION_HOURS", 24))
_raw_threads = int(_get("SERVER_THREADS", 0))
if _raw_threads == 0:
server_threads = _calculate_auto_threads()
server_threads_auto = True
else:
server_threads = _raw_threads
server_threads_auto = False
_raw_conn_limit = int(_get("SERVER_CONNECTION_LIMIT", 0))
if _raw_conn_limit == 0:
server_connection_limit = _calculate_auto_connection_limit()
server_connection_limit_auto = True
else:
server_connection_limit = _raw_conn_limit
server_connection_limit_auto = False
_raw_backlog = int(_get("SERVER_BACKLOG", 0))
if _raw_backlog == 0:
server_backlog = _calculate_auto_backlog(server_connection_limit)
server_backlog_auto = True
else:
server_backlog = _raw_backlog
server_backlog_auto = False
server_channel_timeout = int(_get("SERVER_CHANNEL_TIMEOUT", 120))
site_sync_enabled = str(_get("SITE_SYNC_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
site_sync_interval_seconds = int(_get("SITE_SYNC_INTERVAL_SECONDS", 60))
site_sync_batch_size = int(_get("SITE_SYNC_BATCH_SIZE", 100))
sigv4_timestamp_tolerance_seconds = int(_get("SIGV4_TIMESTAMP_TOLERANCE_SECONDS", 900))
presigned_url_min_expiry_seconds = int(_get("PRESIGNED_URL_MIN_EXPIRY_SECONDS", 1))
presigned_url_max_expiry_seconds = int(_get("PRESIGNED_URL_MAX_EXPIRY_SECONDS", 604800))
replication_connect_timeout_seconds = int(_get("REPLICATION_CONNECT_TIMEOUT_SECONDS", 5))
replication_read_timeout_seconds = int(_get("REPLICATION_READ_TIMEOUT_SECONDS", 30))
replication_max_retries = int(_get("REPLICATION_MAX_RETRIES", 2))
replication_streaming_threshold_bytes = int(_get("REPLICATION_STREAMING_THRESHOLD_BYTES", 10 * 1024 * 1024))
replication_max_failures_per_bucket = int(_get("REPLICATION_MAX_FAILURES_PER_BUCKET", 50))
site_sync_connect_timeout_seconds = int(_get("SITE_SYNC_CONNECT_TIMEOUT_SECONDS", 10))
site_sync_read_timeout_seconds = int(_get("SITE_SYNC_READ_TIMEOUT_SECONDS", 120))
site_sync_max_retries = int(_get("SITE_SYNC_MAX_RETRIES", 2))
site_sync_clock_skew_tolerance_seconds = float(_get("SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS", 1.0))
object_key_max_length_bytes = int(_get("OBJECT_KEY_MAX_LENGTH_BYTES", 1024))
object_cache_max_size = int(_get("OBJECT_CACHE_MAX_SIZE", 100))
bucket_config_cache_ttl_seconds = float(_get("BUCKET_CONFIG_CACHE_TTL_SECONDS", 30.0))
object_tag_limit = int(_get("OBJECT_TAG_LIMIT", 50))
encryption_chunk_size_bytes = int(_get("ENCRYPTION_CHUNK_SIZE_BYTES", 64 * 1024))
kms_generate_data_key_min_bytes = int(_get("KMS_GENERATE_DATA_KEY_MIN_BYTES", 1))
kms_generate_data_key_max_bytes = int(_get("KMS_GENERATE_DATA_KEY_MAX_BYTES", 1024))
lifecycle_max_history_per_bucket = int(_get("LIFECYCLE_MAX_HISTORY_PER_BUCKET", 50))
site_id_raw = _get("SITE_ID", None)
site_id = str(site_id_raw).strip() if site_id_raw else None
site_endpoint_raw = _get("SITE_ENDPOINT", None)
site_endpoint = str(site_endpoint_raw).strip() if site_endpoint_raw else None
site_region = str(_get("SITE_REGION", "us-east-1"))
site_priority = int(_get("SITE_PRIORITY", 100))
ratelimit_admin = _validate_rate_limit(str(_get("RATE_LIMIT_ADMIN", "60 per minute")))
num_trusted_proxies = int(_get("NUM_TRUSTED_PROXIES", 0))
allowed_redirect_hosts_raw = _get("ALLOWED_REDIRECT_HOSTS", "")
allowed_redirect_hosts = [h.strip() for h in str(allowed_redirect_hosts_raw).split(",") if h.strip()]
allow_internal_endpoints = str(_get("ALLOW_INTERNAL_ENDPOINTS", "0")).lower() in {"1", "true", "yes", "on"}
website_hosting_enabled = str(_get("WEBSITE_HOSTING_ENABLED", "0")).lower() in {"1", "true", "yes", "on"}
return cls(storage_root=storage_root, return cls(storage_root=storage_root,
max_upload_size=max_upload_size, max_upload_size=max_upload_size,
@@ -337,10 +180,6 @@ class AppConfig:
log_backup_count=log_backup_count, log_backup_count=log_backup_count,
ratelimit_default=ratelimit_default, ratelimit_default=ratelimit_default,
ratelimit_storage_uri=ratelimit_storage_uri, ratelimit_storage_uri=ratelimit_storage_uri,
ratelimit_list_buckets=ratelimit_list_buckets,
ratelimit_bucket_ops=ratelimit_bucket_ops,
ratelimit_object_ops=ratelimit_object_ops,
ratelimit_head_ops=ratelimit_head_ops,
cors_origins=cors_origins, cors_origins=cors_origins,
cors_methods=cors_methods, cors_methods=cors_methods,
cors_allow_headers=cors_allow_headers, cors_allow_headers=cors_allow_headers,
@@ -353,60 +192,11 @@ class AppConfig:
stream_chunk_size=stream_chunk_size, stream_chunk_size=stream_chunk_size,
multipart_min_part_size=multipart_min_part_size, multipart_min_part_size=multipart_min_part_size,
bucket_stats_cache_ttl=bucket_stats_cache_ttl, bucket_stats_cache_ttl=bucket_stats_cache_ttl,
object_cache_ttl=object_cache_ttl,
encryption_enabled=encryption_enabled, encryption_enabled=encryption_enabled,
encryption_master_key_path=encryption_master_key_path, encryption_master_key_path=encryption_master_key_path,
kms_enabled=kms_enabled, kms_enabled=kms_enabled,
kms_keys_path=kms_keys_path, kms_keys_path=kms_keys_path,
default_encryption_algorithm=default_encryption_algorithm, default_encryption_algorithm=default_encryption_algorithm)
display_timezone=display_timezone,
lifecycle_enabled=lifecycle_enabled,
lifecycle_interval_seconds=lifecycle_interval_seconds,
metrics_history_enabled=metrics_history_enabled,
metrics_history_retention_hours=metrics_history_retention_hours,
metrics_history_interval_minutes=metrics_history_interval_minutes,
operation_metrics_enabled=operation_metrics_enabled,
operation_metrics_interval_minutes=operation_metrics_interval_minutes,
operation_metrics_retention_hours=operation_metrics_retention_hours,
server_threads=server_threads,
server_connection_limit=server_connection_limit,
server_backlog=server_backlog,
server_channel_timeout=server_channel_timeout,
server_threads_auto=server_threads_auto,
server_connection_limit_auto=server_connection_limit_auto,
server_backlog_auto=server_backlog_auto,
site_sync_enabled=site_sync_enabled,
site_sync_interval_seconds=site_sync_interval_seconds,
site_sync_batch_size=site_sync_batch_size,
sigv4_timestamp_tolerance_seconds=sigv4_timestamp_tolerance_seconds,
presigned_url_min_expiry_seconds=presigned_url_min_expiry_seconds,
presigned_url_max_expiry_seconds=presigned_url_max_expiry_seconds,
replication_connect_timeout_seconds=replication_connect_timeout_seconds,
replication_read_timeout_seconds=replication_read_timeout_seconds,
replication_max_retries=replication_max_retries,
replication_streaming_threshold_bytes=replication_streaming_threshold_bytes,
replication_max_failures_per_bucket=replication_max_failures_per_bucket,
site_sync_connect_timeout_seconds=site_sync_connect_timeout_seconds,
site_sync_read_timeout_seconds=site_sync_read_timeout_seconds,
site_sync_max_retries=site_sync_max_retries,
site_sync_clock_skew_tolerance_seconds=site_sync_clock_skew_tolerance_seconds,
object_key_max_length_bytes=object_key_max_length_bytes,
object_cache_max_size=object_cache_max_size,
bucket_config_cache_ttl_seconds=bucket_config_cache_ttl_seconds,
object_tag_limit=object_tag_limit,
encryption_chunk_size_bytes=encryption_chunk_size_bytes,
kms_generate_data_key_min_bytes=kms_generate_data_key_min_bytes,
kms_generate_data_key_max_bytes=kms_generate_data_key_max_bytes,
lifecycle_max_history_per_bucket=lifecycle_max_history_per_bucket,
site_id=site_id,
site_endpoint=site_endpoint,
site_region=site_region,
site_priority=site_priority,
ratelimit_admin=ratelimit_admin,
num_trusted_proxies=num_trusted_proxies,
allowed_redirect_hosts=allowed_redirect_hosts,
allow_internal_endpoints=allow_internal_endpoints,
website_hosting_enabled=website_hosting_enabled)
def validate_and_report(self) -> list[str]: def validate_and_report(self) -> list[str]:
"""Validate configuration and return a list of warnings/issues. """Validate configuration and return a list of warnings/issues.
@@ -416,6 +206,7 @@ class AppConfig:
""" """
issues = [] issues = []
# Check if storage_root is writable
try: try:
test_file = self.storage_root / ".write_test" test_file = self.storage_root / ".write_test"
test_file.touch() test_file.touch()
@@ -423,20 +214,24 @@ class AppConfig:
except (OSError, PermissionError) as e: except (OSError, PermissionError) as e:
issues.append(f"CRITICAL: STORAGE_ROOT '{self.storage_root}' is not writable: {e}") issues.append(f"CRITICAL: STORAGE_ROOT '{self.storage_root}' is not writable: {e}")
# Check if storage_root looks like a temp directory
storage_str = str(self.storage_root).lower() storage_str = str(self.storage_root).lower()
if "/tmp" in storage_str or "\\temp" in storage_str or "appdata\\local\\temp" in storage_str: if "/tmp" in storage_str or "\\temp" in storage_str or "appdata\\local\\temp" in storage_str:
issues.append(f"WARNING: STORAGE_ROOT '{self.storage_root}' appears to be a temporary directory. Data may be lost on reboot!") issues.append(f"WARNING: STORAGE_ROOT '{self.storage_root}' appears to be a temporary directory. Data may be lost on reboot!")
# Check if IAM config path is under storage_root
try: try:
self.iam_config_path.relative_to(self.storage_root) self.iam_config_path.relative_to(self.storage_root)
except ValueError: except ValueError:
issues.append(f"WARNING: IAM_CONFIG '{self.iam_config_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting IAM_CONFIG explicitly or ensuring paths are aligned.") issues.append(f"WARNING: IAM_CONFIG '{self.iam_config_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting IAM_CONFIG explicitly or ensuring paths are aligned.")
# Check if bucket policy path is under storage_root
try: try:
self.bucket_policy_path.relative_to(self.storage_root) self.bucket_policy_path.relative_to(self.storage_root)
except ValueError: except ValueError:
issues.append(f"WARNING: BUCKET_POLICY_PATH '{self.bucket_policy_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting BUCKET_POLICY_PATH explicitly.") issues.append(f"WARNING: BUCKET_POLICY_PATH '{self.bucket_policy_path}' is outside STORAGE_ROOT '{self.storage_root}'. Consider setting BUCKET_POLICY_PATH explicitly.")
# Check if log path is writable
try: try:
self.log_path.parent.mkdir(parents=True, exist_ok=True) self.log_path.parent.mkdir(parents=True, exist_ok=True)
test_log = self.log_path.parent / ".write_test" test_log = self.log_path.parent / ".write_test"
@@ -445,56 +240,32 @@ class AppConfig:
except (OSError, PermissionError) as e: except (OSError, PermissionError) as e:
issues.append(f"WARNING: Log directory '{self.log_path.parent}' is not writable: {e}") issues.append(f"WARNING: Log directory '{self.log_path.parent}' is not writable: {e}")
# Check log path location
log_str = str(self.log_path).lower() log_str = str(self.log_path).lower()
if "/tmp" in log_str or "\\temp" in log_str or "appdata\\local\\temp" in log_str: if "/tmp" in log_str or "\\temp" in log_str or "appdata\\local\\temp" in log_str:
issues.append(f"WARNING: LOG_DIR '{self.log_path.parent}' appears to be a temporary directory. Logs may be lost on reboot!") issues.append(f"WARNING: LOG_DIR '{self.log_path.parent}' appears to be a temporary directory. Logs may be lost on reboot!")
# Check if encryption keys path is under storage_root (when encryption is enabled)
if self.encryption_enabled: if self.encryption_enabled:
try: try:
self.encryption_master_key_path.relative_to(self.storage_root) self.encryption_master_key_path.relative_to(self.storage_root)
except ValueError: except ValueError:
issues.append(f"WARNING: ENCRYPTION_MASTER_KEY_PATH '{self.encryption_master_key_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.") issues.append(f"WARNING: ENCRYPTION_MASTER_KEY_PATH '{self.encryption_master_key_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.")
# Check if KMS keys path is under storage_root (when KMS is enabled)
if self.kms_enabled: if self.kms_enabled:
try: try:
self.kms_keys_path.relative_to(self.storage_root) self.kms_keys_path.relative_to(self.storage_root)
except ValueError: except ValueError:
issues.append(f"WARNING: KMS_KEYS_PATH '{self.kms_keys_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.") issues.append(f"WARNING: KMS_KEYS_PATH '{self.kms_keys_path}' is outside STORAGE_ROOT. Ensure proper backup procedures.")
# Warn about production settings
if self.secret_key == "dev-secret-key": if self.secret_key == "dev-secret-key":
issues.append("WARNING: Using default SECRET_KEY. Set SECRET_KEY environment variable for production.") issues.append("WARNING: Using default SECRET_KEY. Set SECRET_KEY environment variable for production.")
if "*" in self.cors_origins: if "*" in self.cors_origins:
issues.append("INFO: CORS_ORIGINS is set to '*'. Consider restricting to specific domains in production.") issues.append("INFO: CORS_ORIGINS is set to '*'. Consider restricting to specific domains in production.")
if not (1 <= self.server_threads <= 64):
issues.append(f"CRITICAL: SERVER_THREADS={self.server_threads} is outside valid range (1-64). Server cannot start.")
if not (10 <= self.server_connection_limit <= 1000):
issues.append(f"CRITICAL: SERVER_CONNECTION_LIMIT={self.server_connection_limit} is outside valid range (10-1000). Server cannot start.")
if not (64 <= self.server_backlog <= 4096):
issues.append(f"CRITICAL: SERVER_BACKLOG={self.server_backlog} is outside valid range (64-4096). Server cannot start.")
if not (10 <= self.server_channel_timeout <= 300):
issues.append(f"CRITICAL: SERVER_CHANNEL_TIMEOUT={self.server_channel_timeout} is outside valid range (10-300). Server cannot start.")
if sys.platform != "win32":
try:
import resource
soft_limit, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
threshold = int(soft_limit * 0.8)
if self.server_connection_limit > threshold:
issues.append(f"WARNING: SERVER_CONNECTION_LIMIT={self.server_connection_limit} exceeds 80% of system file descriptor limit (soft={soft_limit}). Consider running 'ulimit -n {self.server_connection_limit + 100}'.")
except (ImportError, OSError):
pass
try:
import psutil
available_mb = psutil.virtual_memory().available / (1024 * 1024)
estimated_mb = self.server_threads * 50
if estimated_mb > available_mb * 0.5:
issues.append(f"WARNING: SERVER_THREADS={self.server_threads} may require ~{estimated_mb}MB memory, exceeding 50% of available RAM ({int(available_mb)}MB).")
except ImportError:
pass
return issues return issues
def print_startup_summary(self) -> None: def print_startup_summary(self) -> None:
@@ -512,14 +283,6 @@ class AppConfig:
print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})") print(f" ENCRYPTION: Enabled (Master key: {self.encryption_master_key_path})")
if self.kms_enabled: if self.kms_enabled:
print(f" KMS: Enabled (Keys: {self.kms_keys_path})") print(f" KMS: Enabled (Keys: {self.kms_keys_path})")
if self.website_hosting_enabled:
print(f" WEBSITE_HOSTING: Enabled")
def _auto(flag: bool) -> str:
return " (auto)" if flag else ""
print(f" SERVER_THREADS: {self.server_threads}{_auto(self.server_threads_auto)}")
print(f" CONNECTION_LIMIT: {self.server_connection_limit}{_auto(self.server_connection_limit_auto)}")
print(f" BACKLOG: {self.server_backlog}{_auto(self.server_backlog_auto)}")
print(f" CHANNEL_TIMEOUT: {self.server_channel_timeout}s")
print("=" * 60) print("=" * 60)
issues = self.validate_and_report() issues = self.validate_and_report()
@@ -550,7 +313,6 @@ class AppConfig:
"STREAM_CHUNK_SIZE": self.stream_chunk_size, "STREAM_CHUNK_SIZE": self.stream_chunk_size,
"MULTIPART_MIN_PART_SIZE": self.multipart_min_part_size, "MULTIPART_MIN_PART_SIZE": self.multipart_min_part_size,
"BUCKET_STATS_CACHE_TTL": self.bucket_stats_cache_ttl, "BUCKET_STATS_CACHE_TTL": self.bucket_stats_cache_ttl,
"OBJECT_CACHE_TTL": self.object_cache_ttl,
"LOG_LEVEL": self.log_level, "LOG_LEVEL": self.log_level,
"LOG_TO_FILE": self.log_to_file, "LOG_TO_FILE": self.log_to_file,
"LOG_FILE": str(self.log_path), "LOG_FILE": str(self.log_path),
@@ -558,10 +320,6 @@ class AppConfig:
"LOG_BACKUP_COUNT": self.log_backup_count, "LOG_BACKUP_COUNT": self.log_backup_count,
"RATELIMIT_DEFAULT": self.ratelimit_default, "RATELIMIT_DEFAULT": self.ratelimit_default,
"RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri, "RATELIMIT_STORAGE_URI": self.ratelimit_storage_uri,
"RATELIMIT_LIST_BUCKETS": self.ratelimit_list_buckets,
"RATELIMIT_BUCKET_OPS": self.ratelimit_bucket_ops,
"RATELIMIT_OBJECT_OPS": self.ratelimit_object_ops,
"RATELIMIT_HEAD_OPS": self.ratelimit_head_ops,
"CORS_ORIGINS": self.cors_origins, "CORS_ORIGINS": self.cors_origins,
"CORS_METHODS": self.cors_methods, "CORS_METHODS": self.cors_methods,
"CORS_ALLOW_HEADERS": self.cors_allow_headers, "CORS_ALLOW_HEADERS": self.cors_allow_headers,
@@ -572,49 +330,4 @@ class AppConfig:
"KMS_ENABLED": self.kms_enabled, "KMS_ENABLED": self.kms_enabled,
"KMS_KEYS_PATH": str(self.kms_keys_path), "KMS_KEYS_PATH": str(self.kms_keys_path),
"DEFAULT_ENCRYPTION_ALGORITHM": self.default_encryption_algorithm, "DEFAULT_ENCRYPTION_ALGORITHM": self.default_encryption_algorithm,
"DISPLAY_TIMEZONE": self.display_timezone,
"LIFECYCLE_ENABLED": self.lifecycle_enabled,
"LIFECYCLE_INTERVAL_SECONDS": self.lifecycle_interval_seconds,
"METRICS_HISTORY_ENABLED": self.metrics_history_enabled,
"METRICS_HISTORY_RETENTION_HOURS": self.metrics_history_retention_hours,
"METRICS_HISTORY_INTERVAL_MINUTES": self.metrics_history_interval_minutes,
"OPERATION_METRICS_ENABLED": self.operation_metrics_enabled,
"OPERATION_METRICS_INTERVAL_MINUTES": self.operation_metrics_interval_minutes,
"OPERATION_METRICS_RETENTION_HOURS": self.operation_metrics_retention_hours,
"SERVER_THREADS": self.server_threads,
"SERVER_CONNECTION_LIMIT": self.server_connection_limit,
"SERVER_BACKLOG": self.server_backlog,
"SERVER_CHANNEL_TIMEOUT": self.server_channel_timeout,
"SITE_SYNC_ENABLED": self.site_sync_enabled,
"SITE_SYNC_INTERVAL_SECONDS": self.site_sync_interval_seconds,
"SITE_SYNC_BATCH_SIZE": self.site_sync_batch_size,
"SIGV4_TIMESTAMP_TOLERANCE_SECONDS": self.sigv4_timestamp_tolerance_seconds,
"PRESIGNED_URL_MIN_EXPIRY_SECONDS": self.presigned_url_min_expiry_seconds,
"PRESIGNED_URL_MAX_EXPIRY_SECONDS": self.presigned_url_max_expiry_seconds,
"REPLICATION_CONNECT_TIMEOUT_SECONDS": self.replication_connect_timeout_seconds,
"REPLICATION_READ_TIMEOUT_SECONDS": self.replication_read_timeout_seconds,
"REPLICATION_MAX_RETRIES": self.replication_max_retries,
"REPLICATION_STREAMING_THRESHOLD_BYTES": self.replication_streaming_threshold_bytes,
"REPLICATION_MAX_FAILURES_PER_BUCKET": self.replication_max_failures_per_bucket,
"SITE_SYNC_CONNECT_TIMEOUT_SECONDS": self.site_sync_connect_timeout_seconds,
"SITE_SYNC_READ_TIMEOUT_SECONDS": self.site_sync_read_timeout_seconds,
"SITE_SYNC_MAX_RETRIES": self.site_sync_max_retries,
"SITE_SYNC_CLOCK_SKEW_TOLERANCE_SECONDS": self.site_sync_clock_skew_tolerance_seconds,
"OBJECT_KEY_MAX_LENGTH_BYTES": self.object_key_max_length_bytes,
"OBJECT_CACHE_MAX_SIZE": self.object_cache_max_size,
"BUCKET_CONFIG_CACHE_TTL_SECONDS": self.bucket_config_cache_ttl_seconds,
"OBJECT_TAG_LIMIT": self.object_tag_limit,
"ENCRYPTION_CHUNK_SIZE_BYTES": self.encryption_chunk_size_bytes,
"KMS_GENERATE_DATA_KEY_MIN_BYTES": self.kms_generate_data_key_min_bytes,
"KMS_GENERATE_DATA_KEY_MAX_BYTES": self.kms_generate_data_key_max_bytes,
"LIFECYCLE_MAX_HISTORY_PER_BUCKET": self.lifecycle_max_history_per_bucket,
"SITE_ID": self.site_id,
"SITE_ENDPOINT": self.site_endpoint,
"SITE_REGION": self.site_region,
"SITE_PRIORITY": self.site_priority,
"RATE_LIMIT_ADMIN": self.ratelimit_admin,
"NUM_TRUSTED_PROXIES": self.num_trusted_proxies,
"ALLOWED_REDIRECT_HOSTS": self.allowed_redirect_hosts,
"ALLOW_INTERNAL_ENDPOINTS": self.allow_internal_endpoints,
"WEBSITE_HOSTING_ENABLED": self.website_hosting_enabled,
} }

View File

@@ -1,3 +1,4 @@
"""Manage remote S3 connections."""
from __future__ import annotations from __future__ import annotations
import json import json

View File

@@ -1,3 +1,4 @@
"""Encrypted storage layer that wraps ObjectStorage with encryption support."""
from __future__ import annotations from __future__ import annotations
import io import io
@@ -78,7 +79,7 @@ class EncryptedObjectStorage:
kms_key_id: Optional[str] = None, kms_key_id: Optional[str] = None,
) -> ObjectMeta: ) -> ObjectMeta:
"""Store an object, optionally with encryption. """Store an object, optionally with encryption.
Args: Args:
bucket_name: Name of the bucket bucket_name: Name of the bucket
object_key: Key for the object object_key: Key for the object
@@ -86,41 +87,42 @@ class EncryptedObjectStorage:
metadata: Optional user metadata metadata: Optional user metadata
server_side_encryption: Encryption algorithm ("AES256" or "aws:kms") server_side_encryption: Encryption algorithm ("AES256" or "aws:kms")
kms_key_id: KMS key ID (for aws:kms encryption) kms_key_id: KMS key ID (for aws:kms encryption)
Returns: Returns:
ObjectMeta with object information ObjectMeta with object information
Performance: Uses streaming encryption for large files to reduce memory usage.
""" """
should_encrypt, algorithm, detected_kms_key = self._should_encrypt( should_encrypt, algorithm, detected_kms_key = self._should_encrypt(
bucket_name, server_side_encryption bucket_name, server_side_encryption
) )
if kms_key_id is None: if kms_key_id is None:
kms_key_id = detected_kms_key kms_key_id = detected_kms_key
if should_encrypt: if should_encrypt:
data = stream.read()
try: try:
# Performance: Use streaming encryption to avoid loading entire file into memory ciphertext, enc_metadata = self.encryption.encrypt_object(
encrypted_stream, enc_metadata = self.encryption.encrypt_stream( data,
stream,
algorithm=algorithm, algorithm=algorithm,
kms_key_id=kms_key_id,
context={"bucket": bucket_name, "key": object_key}, context={"bucket": bucket_name, "key": object_key},
) )
combined_metadata = metadata.copy() if metadata else {} combined_metadata = metadata.copy() if metadata else {}
combined_metadata.update(enc_metadata.to_dict()) combined_metadata.update(enc_metadata.to_dict())
encrypted_stream = io.BytesIO(ciphertext)
result = self.storage.put_object( result = self.storage.put_object(
bucket_name, bucket_name,
object_key, object_key,
encrypted_stream, encrypted_stream,
metadata=combined_metadata, metadata=combined_metadata,
) )
result.metadata = combined_metadata result.metadata = combined_metadata
return result return result
except EncryptionError as exc: except EncryptionError as exc:
raise StorageError(f"Encryption failed: {exc}") from exc raise StorageError(f"Encryption failed: {exc}") from exc
else: else:
@@ -133,34 +135,33 @@ class EncryptedObjectStorage:
def get_object_data(self, bucket_name: str, object_key: str) -> tuple[bytes, Dict[str, str]]: def get_object_data(self, bucket_name: str, object_key: str) -> tuple[bytes, Dict[str, str]]:
"""Get object data, decrypting if necessary. """Get object data, decrypting if necessary.
Returns: Returns:
Tuple of (data, metadata) Tuple of (data, metadata)
Performance: Uses streaming decryption to reduce memory usage.
""" """
path = self.storage.get_object_path(bucket_name, object_key) path = self.storage.get_object_path(bucket_name, object_key)
metadata = self.storage.get_object_metadata(bucket_name, object_key) metadata = self.storage.get_object_metadata(bucket_name, object_key)
with path.open("rb") as f:
data = f.read()
enc_metadata = EncryptionMetadata.from_dict(metadata) enc_metadata = EncryptionMetadata.from_dict(metadata)
if enc_metadata: if enc_metadata:
try: try:
# Performance: Use streaming decryption to avoid loading entire file into memory data = self.encryption.decrypt_object(
with path.open("rb") as f: data,
decrypted_stream = self.encryption.decrypt_stream(f, enc_metadata) enc_metadata,
data = decrypted_stream.read() context={"bucket": bucket_name, "key": object_key},
)
except EncryptionError as exc: except EncryptionError as exc:
raise StorageError(f"Decryption failed: {exc}") from exc raise StorageError(f"Decryption failed: {exc}") from exc
else:
with path.open("rb") as f:
data = f.read()
clean_metadata = { clean_metadata = {
k: v for k, v in metadata.items() k: v for k, v in metadata.items()
if not k.startswith("x-amz-encryption") if not k.startswith("x-amz-encryption")
and k != "x-amz-encrypted-data-key" and k != "x-amz-encrypted-data-key"
} }
return data, clean_metadata return data, clean_metadata
def get_object_stream(self, bucket_name: str, object_key: str) -> tuple[BinaryIO, Dict[str, str], int]: def get_object_stream(self, bucket_name: str, object_key: str) -> tuple[BinaryIO, Dict[str, str], int]:
@@ -270,15 +271,9 @@ class EncryptedObjectStorage:
def get_bucket_quota(self, bucket_name: str): def get_bucket_quota(self, bucket_name: str):
return self.storage.get_bucket_quota(bucket_name) return self.storage.get_bucket_quota(bucket_name)
def set_bucket_quota(self, bucket_name: str, *, max_bytes=None, max_objects=None): def set_bucket_quota(self, bucket_name: str, *, max_bytes=None, max_objects=None):
return self.storage.set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects) return self.storage.set_bucket_quota(bucket_name, max_bytes=max_bytes, max_objects=max_objects)
def get_bucket_website(self, bucket_name: str):
return self.storage.get_bucket_website(bucket_name)
def set_bucket_website(self, bucket_name: str, website_config):
return self.storage.set_bucket_website(bucket_name, website_config)
def _compute_etag(self, path: Path) -> str: def _compute_etag(self, path: Path) -> str:
return self.storage._compute_etag(path) return self.storage._compute_etag(path)

View File

@@ -1,44 +1,15 @@
"""Encryption providers for server-side and client-side encryption."""
from __future__ import annotations from __future__ import annotations
import base64 import base64
import io import io
import json import json
import logging
import os
import secrets import secrets
import subprocess
import sys
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import Any, BinaryIO, Dict, Generator, Optional from typing import Any, BinaryIO, Dict, Generator, Optional
from cryptography.hazmat.primitives.ciphers.aead import AESGCM from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from cryptography.hazmat.primitives.kdf.hkdf import HKDF
from cryptography.hazmat.primitives import hashes
if sys.platform != "win32":
import fcntl
logger = logging.getLogger(__name__)
def _set_secure_file_permissions(file_path: Path) -> None:
"""Set restrictive file permissions (owner read/write only)."""
if sys.platform == "win32":
try:
username = os.environ.get("USERNAME", "")
if username:
subprocess.run(
["icacls", str(file_path), "/inheritance:r",
"/grant:r", f"{username}:F"],
check=True, capture_output=True
)
else:
logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
except (subprocess.SubprocessError, OSError) as exc:
logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
else:
os.chmod(file_path, 0o600)
class EncryptionError(Exception): class EncryptionError(Exception):
@@ -88,34 +59,22 @@ class EncryptionMetadata:
class EncryptionProvider: class EncryptionProvider:
"""Base class for encryption providers.""" """Base class for encryption providers."""
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult: def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
raise NotImplementedError raise NotImplementedError
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes, def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
key_id: str, context: Dict[str, str] | None = None) -> bytes: key_id: str, context: Dict[str, str] | None = None) -> bytes:
raise NotImplementedError raise NotImplementedError
def generate_data_key(self) -> tuple[bytes, bytes]: def generate_data_key(self) -> tuple[bytes, bytes]:
"""Generate a data key and its encrypted form. """Generate a data key and its encrypted form.
Returns: Returns:
Tuple of (plaintext_key, encrypted_key) Tuple of (plaintext_key, encrypted_key)
""" """
raise NotImplementedError raise NotImplementedError
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
"""Decrypt an encrypted data key.
Args:
encrypted_data_key: The encrypted data key bytes
key_id: Optional key identifier (used by KMS providers)
Returns:
The decrypted data key
"""
raise NotImplementedError
class LocalKeyEncryption(EncryptionProvider): class LocalKeyEncryption(EncryptionProvider):
"""SSE-S3 style encryption using a local master key. """SSE-S3 style encryption using a local master key.
@@ -140,48 +99,28 @@ class LocalKeyEncryption(EncryptionProvider):
return self._master_key return self._master_key
def _load_or_create_master_key(self) -> bytes: def _load_or_create_master_key(self) -> bytes:
"""Load master key from file or generate a new one (with file locking).""" """Load master key from file or generate a new one."""
lock_path = self.master_key_path.with_suffix(".lock") if self.master_key_path.exists():
lock_path.parent.mkdir(parents=True, exist_ok=True) try:
return base64.b64decode(self.master_key_path.read_text().strip())
except Exception as exc:
raise EncryptionError(f"Failed to load master key: {exc}") from exc
key = secrets.token_bytes(32)
try: try:
with open(lock_path, "w") as lock_file: self.master_key_path.parent.mkdir(parents=True, exist_ok=True)
if sys.platform == "win32": self.master_key_path.write_text(base64.b64encode(key).decode())
import msvcrt
msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1)
else:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX)
try:
if self.master_key_path.exists():
try:
return base64.b64decode(self.master_key_path.read_text().strip())
except Exception as exc:
raise EncryptionError(f"Failed to load master key: {exc}") from exc
key = secrets.token_bytes(32)
try:
self.master_key_path.write_text(base64.b64encode(key).decode())
_set_secure_file_permissions(self.master_key_path)
except OSError as exc:
raise EncryptionError(f"Failed to save master key: {exc}") from exc
return key
finally:
if sys.platform == "win32":
import msvcrt
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
else:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
except OSError as exc: except OSError as exc:
raise EncryptionError(f"Failed to acquire lock for master key: {exc}") from exc raise EncryptionError(f"Failed to save master key: {exc}") from exc
return key
DATA_KEY_AAD = b'{"purpose":"data_key","version":1}'
def _encrypt_data_key(self, data_key: bytes) -> bytes: def _encrypt_data_key(self, data_key: bytes) -> bytes:
"""Encrypt the data key with the master key.""" """Encrypt the data key with the master key."""
aesgcm = AESGCM(self.master_key) aesgcm = AESGCM(self.master_key)
nonce = secrets.token_bytes(12) nonce = secrets.token_bytes(12)
encrypted = aesgcm.encrypt(nonce, data_key, self.DATA_KEY_AAD) encrypted = aesgcm.encrypt(nonce, data_key, None)
return nonce + encrypted return nonce + encrypted
def _decrypt_data_key(self, encrypted_data_key: bytes) -> bytes: def _decrypt_data_key(self, encrypted_data_key: bytes) -> bytes:
"""Decrypt the data key using the master key.""" """Decrypt the data key using the master key."""
if len(encrypted_data_key) < 12 + 32 + 16: # nonce + key + tag if len(encrypted_data_key) < 12 + 32 + 16: # nonce + key + tag
@@ -190,17 +129,10 @@ class LocalKeyEncryption(EncryptionProvider):
nonce = encrypted_data_key[:12] nonce = encrypted_data_key[:12]
ciphertext = encrypted_data_key[12:] ciphertext = encrypted_data_key[12:]
try: try:
return aesgcm.decrypt(nonce, ciphertext, self.DATA_KEY_AAD) return aesgcm.decrypt(nonce, ciphertext, None)
except Exception: except Exception as exc:
try: raise EncryptionError(f"Failed to decrypt data key: {exc}") from exc
return aesgcm.decrypt(nonce, ciphertext, None)
except Exception as exc:
raise EncryptionError(f"Failed to decrypt data key: {exc}") from exc
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
"""Decrypt an encrypted data key (key_id ignored for local encryption)."""
return self._decrypt_data_key(encrypted_data_key)
def generate_data_key(self) -> tuple[bytes, bytes]: def generate_data_key(self) -> tuple[bytes, bytes]:
"""Generate a data key and its encrypted form.""" """Generate a data key and its encrypted form."""
plaintext_key = secrets.token_bytes(32) plaintext_key = secrets.token_bytes(32)
@@ -210,12 +142,11 @@ class LocalKeyEncryption(EncryptionProvider):
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult: def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
"""Encrypt data using envelope encryption.""" """Encrypt data using envelope encryption."""
data_key, encrypted_data_key = self.generate_data_key() data_key, encrypted_data_key = self.generate_data_key()
aesgcm = AESGCM(data_key) aesgcm = AESGCM(data_key)
nonce = secrets.token_bytes(12) nonce = secrets.token_bytes(12)
aad = json.dumps(context, sort_keys=True).encode() if context else None ciphertext = aesgcm.encrypt(nonce, plaintext, None)
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
return EncryptionResult( return EncryptionResult(
ciphertext=ciphertext, ciphertext=ciphertext,
nonce=nonce, nonce=nonce,
@@ -226,13 +157,15 @@ class LocalKeyEncryption(EncryptionProvider):
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes, def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
key_id: str, context: Dict[str, str] | None = None) -> bytes: key_id: str, context: Dict[str, str] | None = None) -> bytes:
"""Decrypt data using envelope encryption.""" """Decrypt data using envelope encryption."""
# Decrypt the data key
data_key = self._decrypt_data_key(encrypted_data_key) data_key = self._decrypt_data_key(encrypted_data_key)
# Decrypt the data
aesgcm = AESGCM(data_key) aesgcm = AESGCM(data_key)
aad = json.dumps(context, sort_keys=True).encode() if context else None
try: try:
return aesgcm.decrypt(nonce, ciphertext, aad) return aesgcm.decrypt(nonce, ciphertext, None)
except Exception as exc: except Exception as exc:
raise EncryptionError("Failed to decrypt data") from exc raise EncryptionError(f"Failed to decrypt data: {exc}") from exc
class StreamingEncryptor: class StreamingEncryptor:
@@ -250,93 +183,81 @@ class StreamingEncryptor:
self.chunk_size = chunk_size self.chunk_size = chunk_size
def _derive_chunk_nonce(self, base_nonce: bytes, chunk_index: int) -> bytes: def _derive_chunk_nonce(self, base_nonce: bytes, chunk_index: int) -> bytes:
"""Derive a unique nonce for each chunk using HKDF.""" """Derive a unique nonce for each chunk."""
hkdf = HKDF( # XOR the base nonce with the chunk index
algorithm=hashes.SHA256(), nonce_int = int.from_bytes(base_nonce, "big")
length=12, derived = nonce_int ^ chunk_index
salt=base_nonce, return derived.to_bytes(12, "big")
info=chunk_index.to_bytes(4, "big"),
) def encrypt_stream(self, stream: BinaryIO,
return hkdf.derive(b"chunk_nonce")
def encrypt_stream(self, stream: BinaryIO,
context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]: context: Dict[str, str] | None = None) -> tuple[BinaryIO, EncryptionMetadata]:
"""Encrypt a stream and return encrypted stream + metadata. """Encrypt a stream and return encrypted stream + metadata."""
Performance: Writes chunks directly to output buffer instead of accumulating in list.
"""
data_key, encrypted_data_key = self.provider.generate_data_key() data_key, encrypted_data_key = self.provider.generate_data_key()
base_nonce = secrets.token_bytes(12) base_nonce = secrets.token_bytes(12)
aesgcm = AESGCM(data_key) aesgcm = AESGCM(data_key)
# Performance: Write directly to BytesIO instead of accumulating chunks encrypted_chunks = []
output = io.BytesIO()
output.write(b"\x00\x00\x00\x00") # Placeholder for chunk count
chunk_index = 0 chunk_index = 0
while True: while True:
chunk = stream.read(self.chunk_size) chunk = stream.read(self.chunk_size)
if not chunk: if not chunk:
break break
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index) chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None) encrypted_chunk = aesgcm.encrypt(chunk_nonce, chunk, None)
# Write size prefix + encrypted chunk directly size_prefix = len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big")
output.write(len(encrypted_chunk).to_bytes(self.HEADER_SIZE, "big")) encrypted_chunks.append(size_prefix + encrypted_chunk)
output.write(encrypted_chunk)
chunk_index += 1 chunk_index += 1
# Write actual chunk count to header header = chunk_index.to_bytes(4, "big")
output.seek(0) encrypted_data = header + b"".join(encrypted_chunks)
output.write(chunk_index.to_bytes(4, "big"))
output.seek(0)
metadata = EncryptionMetadata( metadata = EncryptionMetadata(
algorithm="AES256", algorithm="AES256",
key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local", key_id=self.provider.KEY_ID if hasattr(self.provider, "KEY_ID") else "local",
nonce=base_nonce, nonce=base_nonce,
encrypted_data_key=encrypted_data_key, encrypted_data_key=encrypted_data_key,
) )
return output, metadata return io.BytesIO(encrypted_data), metadata
def decrypt_stream(self, stream: BinaryIO, metadata: EncryptionMetadata) -> BinaryIO: def decrypt_stream(self, stream: BinaryIO, metadata: EncryptionMetadata) -> BinaryIO:
"""Decrypt a stream using the provided metadata. """Decrypt a stream using the provided metadata."""
if isinstance(self.provider, LocalKeyEncryption):
Performance: Writes chunks directly to output buffer instead of accumulating in list. data_key = self.provider._decrypt_data_key(metadata.encrypted_data_key)
""" else:
data_key = self.provider.decrypt_data_key(metadata.encrypted_data_key, metadata.key_id) raise EncryptionError("Unsupported provider for streaming decryption")
aesgcm = AESGCM(data_key) aesgcm = AESGCM(data_key)
base_nonce = metadata.nonce base_nonce = metadata.nonce
chunk_count_bytes = stream.read(4) chunk_count_bytes = stream.read(4)
if len(chunk_count_bytes) < 4: if len(chunk_count_bytes) < 4:
raise EncryptionError("Invalid encrypted stream: missing header") raise EncryptionError("Invalid encrypted stream: missing header")
chunk_count = int.from_bytes(chunk_count_bytes, "big") chunk_count = int.from_bytes(chunk_count_bytes, "big")
# Performance: Write directly to BytesIO instead of accumulating chunks decrypted_chunks = []
output = io.BytesIO()
for chunk_index in range(chunk_count): for chunk_index in range(chunk_count):
size_bytes = stream.read(self.HEADER_SIZE) size_bytes = stream.read(self.HEADER_SIZE)
if len(size_bytes) < self.HEADER_SIZE: if len(size_bytes) < self.HEADER_SIZE:
raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}") raise EncryptionError(f"Invalid encrypted stream: truncated at chunk {chunk_index}")
chunk_size = int.from_bytes(size_bytes, "big") chunk_size = int.from_bytes(size_bytes, "big")
encrypted_chunk = stream.read(chunk_size) encrypted_chunk = stream.read(chunk_size)
if len(encrypted_chunk) < chunk_size: if len(encrypted_chunk) < chunk_size:
raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}") raise EncryptionError(f"Invalid encrypted stream: incomplete chunk {chunk_index}")
chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index) chunk_nonce = self._derive_chunk_nonce(base_nonce, chunk_index)
try: try:
decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None) decrypted_chunk = aesgcm.decrypt(chunk_nonce, encrypted_chunk, None)
output.write(decrypted_chunk) # Write directly instead of appending to list decrypted_chunks.append(decrypted_chunk)
except Exception as exc: except Exception as exc:
raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc raise EncryptionError(f"Failed to decrypt chunk {chunk_index}: {exc}") from exc
output.seek(0) return io.BytesIO(b"".join(decrypted_chunks))
return output
class EncryptionManager: class EncryptionManager:
@@ -379,8 +300,7 @@ class EncryptionManager:
def get_streaming_encryptor(self) -> StreamingEncryptor: def get_streaming_encryptor(self) -> StreamingEncryptor:
if self._streaming_encryptor is None: if self._streaming_encryptor is None:
chunk_size = self.config.get("encryption_chunk_size_bytes", 64 * 1024) self._streaming_encryptor = StreamingEncryptor(self.get_local_provider())
self._streaming_encryptor = StreamingEncryptor(self.get_local_provider(), chunk_size=chunk_size)
return self._streaming_encryptor return self._streaming_encryptor
def encrypt_object(self, data: bytes, algorithm: str = "AES256", def encrypt_object(self, data: bytes, algorithm: str = "AES256",
@@ -423,115 +343,13 @@ class EncryptionManager:
return encryptor.decrypt_stream(stream, metadata) return encryptor.decrypt_stream(stream, metadata)
class SSECEncryption(EncryptionProvider):
"""SSE-C: Server-Side Encryption with Customer-Provided Keys.
The client provides the encryption key with each request.
Server encrypts/decrypts but never stores the key.
Required headers for PUT:
- x-amz-server-side-encryption-customer-algorithm: AES256
- x-amz-server-side-encryption-customer-key: Base64-encoded 256-bit key
- x-amz-server-side-encryption-customer-key-MD5: Base64-encoded MD5 of key
"""
KEY_ID = "customer-provided"
def __init__(self, customer_key: bytes):
if len(customer_key) != 32:
raise EncryptionError("Customer key must be exactly 256 bits (32 bytes)")
self.customer_key = customer_key
@classmethod
def from_headers(cls, headers: Dict[str, str]) -> "SSECEncryption":
algorithm = headers.get("x-amz-server-side-encryption-customer-algorithm", "")
if algorithm.upper() != "AES256":
raise EncryptionError(f"Unsupported SSE-C algorithm: {algorithm}. Only AES256 is supported.")
key_b64 = headers.get("x-amz-server-side-encryption-customer-key", "")
if not key_b64:
raise EncryptionError("Missing x-amz-server-side-encryption-customer-key header")
key_md5_b64 = headers.get("x-amz-server-side-encryption-customer-key-md5", "")
try:
customer_key = base64.b64decode(key_b64)
except Exception as e:
raise EncryptionError(f"Invalid base64 in customer key: {e}") from e
if len(customer_key) != 32:
raise EncryptionError(f"Customer key must be 256 bits, got {len(customer_key) * 8} bits")
if key_md5_b64:
import hashlib
expected_md5 = base64.b64encode(hashlib.md5(customer_key).digest()).decode()
if key_md5_b64 != expected_md5:
raise EncryptionError("Customer key MD5 mismatch")
return cls(customer_key)
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
aesgcm = AESGCM(self.customer_key)
nonce = secrets.token_bytes(12)
aad = json.dumps(context, sort_keys=True).encode() if context else None
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
return EncryptionResult(
ciphertext=ciphertext,
nonce=nonce,
key_id=self.KEY_ID,
encrypted_data_key=b"",
)
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
key_id: str, context: Dict[str, str] | None = None) -> bytes:
aesgcm = AESGCM(self.customer_key)
aad = json.dumps(context, sort_keys=True).encode() if context else None
try:
return aesgcm.decrypt(nonce, ciphertext, aad)
except Exception as exc:
raise EncryptionError("SSE-C decryption failed") from exc
def generate_data_key(self) -> tuple[bytes, bytes]:
return self.customer_key, b""
@dataclass
class SSECMetadata:
algorithm: str = "AES256"
nonce: bytes = b""
key_md5: str = ""
def to_dict(self) -> Dict[str, str]:
return {
"x-amz-server-side-encryption-customer-algorithm": self.algorithm,
"x-amz-encryption-nonce": base64.b64encode(self.nonce).decode(),
"x-amz-server-side-encryption-customer-key-MD5": self.key_md5,
}
@classmethod
def from_dict(cls, data: Dict[str, str]) -> Optional["SSECMetadata"]:
algorithm = data.get("x-amz-server-side-encryption-customer-algorithm")
if not algorithm:
return None
try:
nonce = base64.b64decode(data.get("x-amz-encryption-nonce", ""))
return cls(
algorithm=algorithm,
nonce=nonce,
key_md5=data.get("x-amz-server-side-encryption-customer-key-MD5", ""),
)
except Exception:
return None
class ClientEncryptionHelper: class ClientEncryptionHelper:
"""Helpers for client-side encryption. """Helpers for client-side encryption.
Client-side encryption is performed by the client, but this helper Client-side encryption is performed by the client, but this helper
provides key generation and materials for clients that need them. provides key generation and materials for clients that need them.
""" """
@staticmethod @staticmethod
def generate_client_key() -> Dict[str, str]: def generate_client_key() -> Dict[str, str]:
"""Generate a new client encryption key.""" """Generate a new client encryption key."""
@@ -544,36 +362,34 @@ class ClientEncryptionHelper:
} }
@staticmethod @staticmethod
def encrypt_with_key(plaintext: bytes, key_b64: str, context: Dict[str, str] | None = None) -> Dict[str, str]: def encrypt_with_key(plaintext: bytes, key_b64: str) -> Dict[str, str]:
"""Encrypt data with a client-provided key.""" """Encrypt data with a client-provided key."""
key = base64.b64decode(key_b64) key = base64.b64decode(key_b64)
if len(key) != 32: if len(key) != 32:
raise EncryptionError("Key must be 256 bits (32 bytes)") raise EncryptionError("Key must be 256 bits (32 bytes)")
aesgcm = AESGCM(key) aesgcm = AESGCM(key)
nonce = secrets.token_bytes(12) nonce = secrets.token_bytes(12)
aad = json.dumps(context, sort_keys=True).encode() if context else None ciphertext = aesgcm.encrypt(nonce, plaintext, None)
ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
return { return {
"ciphertext": base64.b64encode(ciphertext).decode(), "ciphertext": base64.b64encode(ciphertext).decode(),
"nonce": base64.b64encode(nonce).decode(), "nonce": base64.b64encode(nonce).decode(),
"algorithm": "AES-256-GCM", "algorithm": "AES-256-GCM",
} }
@staticmethod @staticmethod
def decrypt_with_key(ciphertext_b64: str, nonce_b64: str, key_b64: str, context: Dict[str, str] | None = None) -> bytes: def decrypt_with_key(ciphertext_b64: str, nonce_b64: str, key_b64: str) -> bytes:
"""Decrypt data with a client-provided key.""" """Decrypt data with a client-provided key."""
key = base64.b64decode(key_b64) key = base64.b64decode(key_b64)
nonce = base64.b64decode(nonce_b64) nonce = base64.b64decode(nonce_b64)
ciphertext = base64.b64decode(ciphertext_b64) ciphertext = base64.b64decode(ciphertext_b64)
if len(key) != 32: if len(key) != 32:
raise EncryptionError("Key must be 256 bits (32 bytes)") raise EncryptionError("Key must be 256 bits (32 bytes)")
aesgcm = AESGCM(key) aesgcm = AESGCM(key)
aad = json.dumps(context, sort_keys=True).encode() if context else None
try: try:
return aesgcm.decrypt(nonce, ciphertext, aad) return aesgcm.decrypt(nonce, ciphertext, None)
except Exception as exc: except Exception as exc:
raise EncryptionError("Decryption failed") from exc raise EncryptionError(f"Decryption failed: {exc}") from exc

View File

@@ -1,3 +1,4 @@
"""Standardized error handling for API and UI responses."""
from __future__ import annotations from __future__ import annotations
import logging import logging
@@ -6,7 +7,6 @@ from typing import Optional, Dict, Any
from xml.etree.ElementTree import Element, SubElement, tostring from xml.etree.ElementTree import Element, SubElement, tostring
from flask import Response, jsonify, request, flash, redirect, url_for, g from flask import Response, jsonify, request, flash, redirect, url_for, g
from flask_limiter import RateLimitExceeded
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@@ -173,22 +173,10 @@ def handle_app_error(error: AppError) -> Response:
return error.to_xml_response() return error.to_xml_response()
def handle_rate_limit_exceeded(e: RateLimitExceeded) -> Response:
g.s3_error_code = "SlowDown"
error = Element("Error")
SubElement(error, "Code").text = "SlowDown"
SubElement(error, "Message").text = "Please reduce your request rate."
SubElement(error, "Resource").text = request.path
SubElement(error, "RequestId").text = getattr(g, "request_id", "")
xml_bytes = tostring(error, encoding="utf-8")
return Response(xml_bytes, status=429, mimetype="application/xml")
def register_error_handlers(app): def register_error_handlers(app):
"""Register error handlers with a Flask app.""" """Register error handlers with a Flask app."""
app.register_error_handler(AppError, handle_app_error) app.register_error_handler(AppError, handle_app_error)
app.register_error_handler(RateLimitExceeded, handle_rate_limit_exceeded)
for error_class in [ for error_class in [
BucketNotFoundError, BucketAlreadyExistsError, BucketNotEmptyError, BucketNotFoundError, BucketAlreadyExistsError, BucketNotEmptyError,
ObjectNotFoundError, InvalidObjectKeyError, ObjectNotFoundError, InvalidObjectKeyError,

View File

@@ -1,3 +1,4 @@
"""Application-wide extension instances."""
from flask import g from flask import g
from flask_limiter import Limiter from flask_limiter import Limiter
from flask_limiter.util import get_remote_address from flask_limiter.util import get_remote_address

View File

@@ -1,25 +1,21 @@
"""Lightweight IAM-style user and policy management."""
from __future__ import annotations from __future__ import annotations
import hashlib
import hmac
import json import json
import math import math
import os
import secrets import secrets
import threading
import time
from collections import deque from collections import deque
from dataclasses import dataclass from dataclasses import dataclass
from datetime import datetime, timedelta, timezone from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set, Tuple from typing import Any, Deque, Dict, Iterable, List, Optional, Sequence, Set
class IamError(RuntimeError): class IamError(RuntimeError):
"""Raised when authentication or authorization fails.""" """Raised when authentication or authorization fails."""
S3_ACTIONS = {"list", "read", "write", "delete", "share", "policy", "replication", "lifecycle", "cors"} S3_ACTIONS = {"list", "read", "write", "delete", "share", "policy", "replication"}
IAM_ACTIONS = { IAM_ACTIONS = {
"iam:list_users", "iam:list_users",
"iam:create_user", "iam:create_user",
@@ -30,12 +26,14 @@ IAM_ACTIONS = {
ALLOWED_ACTIONS = (S3_ACTIONS | IAM_ACTIONS) | {"iam:*"} ALLOWED_ACTIONS = (S3_ACTIONS | IAM_ACTIONS) | {"iam:*"}
ACTION_ALIASES = { ACTION_ALIASES = {
# List actions
"list": "list", "list": "list",
"s3:listbucket": "list", "s3:listbucket": "list",
"s3:listallmybuckets": "list", "s3:listallmybuckets": "list",
"s3:listbucketversions": "list", "s3:listbucketversions": "list",
"s3:listmultipartuploads": "list", "s3:listmultipartuploads": "list",
"s3:listparts": "list", "s3:listparts": "list",
# Read actions
"read": "read", "read": "read",
"s3:getobject": "read", "s3:getobject": "read",
"s3:getobjectversion": "read", "s3:getobjectversion": "read",
@@ -45,6 +43,7 @@ ACTION_ALIASES = {
"s3:getbucketversioning": "read", "s3:getbucketversioning": "read",
"s3:headobject": "read", "s3:headobject": "read",
"s3:headbucket": "read", "s3:headbucket": "read",
# Write actions
"write": "write", "write": "write",
"s3:putobject": "write", "s3:putobject": "write",
"s3:createbucket": "write", "s3:createbucket": "write",
@@ -55,19 +54,23 @@ ACTION_ALIASES = {
"s3:completemultipartupload": "write", "s3:completemultipartupload": "write",
"s3:abortmultipartupload": "write", "s3:abortmultipartupload": "write",
"s3:copyobject": "write", "s3:copyobject": "write",
# Delete actions
"delete": "delete", "delete": "delete",
"s3:deleteobject": "delete", "s3:deleteobject": "delete",
"s3:deleteobjectversion": "delete", "s3:deleteobjectversion": "delete",
"s3:deletebucket": "delete", "s3:deletebucket": "delete",
"s3:deleteobjecttagging": "delete", "s3:deleteobjecttagging": "delete",
# Share actions (ACL)
"share": "share", "share": "share",
"s3:putobjectacl": "share", "s3:putobjectacl": "share",
"s3:putbucketacl": "share", "s3:putbucketacl": "share",
"s3:getbucketacl": "share", "s3:getbucketacl": "share",
# Policy actions
"policy": "policy", "policy": "policy",
"s3:putbucketpolicy": "policy", "s3:putbucketpolicy": "policy",
"s3:getbucketpolicy": "policy", "s3:getbucketpolicy": "policy",
"s3:deletebucketpolicy": "policy", "s3:deletebucketpolicy": "policy",
# Replication actions
"replication": "replication", "replication": "replication",
"s3:getreplicationconfiguration": "replication", "s3:getreplicationconfiguration": "replication",
"s3:putreplicationconfiguration": "replication", "s3:putreplicationconfiguration": "replication",
@@ -75,16 +78,7 @@ ACTION_ALIASES = {
"s3:replicateobject": "replication", "s3:replicateobject": "replication",
"s3:replicatetags": "replication", "s3:replicatetags": "replication",
"s3:replicatedelete": "replication", "s3:replicatedelete": "replication",
"lifecycle": "lifecycle", # IAM actions
"s3:getlifecycleconfiguration": "lifecycle",
"s3:putlifecycleconfiguration": "lifecycle",
"s3:deletelifecycleconfiguration": "lifecycle",
"s3:getbucketlifecycle": "lifecycle",
"s3:putbucketlifecycle": "lifecycle",
"cors": "cors",
"s3:getbucketcors": "cors",
"s3:putbucketcors": "cors",
"s3:deletebucketcors": "cors",
"iam:listusers": "iam:list_users", "iam:listusers": "iam:list_users",
"iam:createuser": "iam:create_user", "iam:createuser": "iam:create_user",
"iam:deleteuser": "iam:delete_user", "iam:deleteuser": "iam:delete_user",
@@ -121,30 +115,17 @@ class IamService:
self._raw_config: Dict[str, Any] = {} self._raw_config: Dict[str, Any] = {}
self._failed_attempts: Dict[str, Deque[datetime]] = {} self._failed_attempts: Dict[str, Deque[datetime]] = {}
self._last_load_time = 0.0 self._last_load_time = 0.0
self._principal_cache: Dict[str, Tuple[Principal, float]] = {}
self._secret_key_cache: Dict[str, Tuple[str, float]] = {}
self._cache_ttl = float(os.environ.get("IAM_CACHE_TTL_SECONDS", "5.0"))
self._last_stat_check = 0.0
self._stat_check_interval = 1.0
self._sessions: Dict[str, Dict[str, Any]] = {}
self._session_lock = threading.Lock()
self._load() self._load()
self._load_lockout_state()
def _maybe_reload(self) -> None: def _maybe_reload(self) -> None:
"""Reload configuration if the file has changed on disk.""" """Reload configuration if the file has changed on disk."""
now = time.time()
if now - self._last_stat_check < self._stat_check_interval:
return
self._last_stat_check = now
try: try:
if self.config_path.stat().st_mtime > self._last_load_time: if self.config_path.stat().st_mtime > self._last_load_time:
self._load() self._load()
self._principal_cache.clear()
self._secret_key_cache.clear()
except OSError: except OSError:
pass pass
# ---------------------- authz helpers ----------------------
def authenticate(self, access_key: str, secret_key: str) -> Principal: def authenticate(self, access_key: str, secret_key: str) -> Principal:
self._maybe_reload() self._maybe_reload()
access_key = (access_key or "").strip() access_key = (access_key or "").strip()
@@ -157,8 +138,7 @@ class IamService:
f"Access temporarily locked. Try again in {seconds} seconds." f"Access temporarily locked. Try again in {seconds} seconds."
) )
record = self._users.get(access_key) record = self._users.get(access_key)
stored_secret = record["secret_key"] if record else secrets.token_urlsafe(24) if not record or record["secret_key"] != secret_key:
if not record or not hmac.compare_digest(stored_secret, secret_key):
self._record_failed_attempt(access_key) self._record_failed_attempt(access_key)
raise IamError("Invalid credentials") raise IamError("Invalid credentials")
self._clear_failed_attempts(access_key) self._clear_failed_attempts(access_key)
@@ -169,50 +149,15 @@ class IamService:
return return
attempts = self._failed_attempts.setdefault(access_key, deque()) attempts = self._failed_attempts.setdefault(access_key, deque())
self._prune_attempts(attempts) self._prune_attempts(attempts)
attempts.append(datetime.now(timezone.utc)) attempts.append(datetime.now())
self._save_lockout_state()
def _clear_failed_attempts(self, access_key: str) -> None: def _clear_failed_attempts(self, access_key: str) -> None:
if not access_key: if not access_key:
return return
if self._failed_attempts.pop(access_key, None) is not None: self._failed_attempts.pop(access_key, None)
self._save_lockout_state()
def _lockout_file(self) -> Path:
return self.config_path.parent / "lockout_state.json"
def _load_lockout_state(self) -> None:
"""Load lockout state from disk."""
try:
if self._lockout_file().exists():
data = json.loads(self._lockout_file().read_text(encoding="utf-8"))
cutoff = datetime.now(timezone.utc) - self.auth_lockout_window
for key, timestamps in data.get("failed_attempts", {}).items():
valid = []
for ts in timestamps:
try:
dt = datetime.fromisoformat(ts)
if dt > cutoff:
valid.append(dt)
except (ValueError, TypeError):
continue
if valid:
self._failed_attempts[key] = deque(valid)
except (OSError, json.JSONDecodeError):
pass
def _save_lockout_state(self) -> None:
"""Persist lockout state to disk."""
data: Dict[str, Any] = {"failed_attempts": {}}
for key, attempts in self._failed_attempts.items():
data["failed_attempts"][key] = [ts.isoformat() for ts in attempts]
try:
self._lockout_file().write_text(json.dumps(data), encoding="utf-8")
except OSError:
pass
def _prune_attempts(self, attempts: Deque[datetime]) -> None: def _prune_attempts(self, attempts: Deque[datetime]) -> None:
cutoff = datetime.now(timezone.utc) - self.auth_lockout_window cutoff = datetime.now() - self.auth_lockout_window
while attempts and attempts[0] < cutoff: while attempts and attempts[0] < cutoff:
attempts.popleft() attempts.popleft()
@@ -233,65 +178,15 @@ class IamService:
if len(attempts) < self.auth_max_attempts: if len(attempts) < self.auth_max_attempts:
return 0 return 0
oldest = attempts[0] oldest = attempts[0]
elapsed = (datetime.now(timezone.utc) - oldest).total_seconds() elapsed = (datetime.now() - oldest).total_seconds()
return int(max(0, self.auth_lockout_window.total_seconds() - elapsed)) return int(max(0, self.auth_lockout_window.total_seconds() - elapsed))
def create_session_token(self, access_key: str, duration_seconds: int = 3600) -> str:
"""Create a temporary session token for an access key."""
self._maybe_reload()
record = self._users.get(access_key)
if not record:
raise IamError("Unknown access key")
self._cleanup_expired_sessions()
token = secrets.token_urlsafe(32)
expires_at = time.time() + duration_seconds
self._sessions[token] = {
"access_key": access_key,
"expires_at": expires_at,
}
return token
def validate_session_token(self, access_key: str, session_token: str) -> bool:
"""Validate a session token for an access key (thread-safe, constant-time)."""
dummy_key = secrets.token_urlsafe(16)
dummy_token = secrets.token_urlsafe(32)
with self._session_lock:
session = self._sessions.get(session_token)
if not session:
hmac.compare_digest(access_key, dummy_key)
hmac.compare_digest(session_token, dummy_token)
return False
key_match = hmac.compare_digest(session["access_key"], access_key)
if not key_match:
hmac.compare_digest(session_token, dummy_token)
return False
if time.time() > session["expires_at"]:
self._sessions.pop(session_token, None)
return False
return True
def _cleanup_expired_sessions(self) -> None:
"""Remove expired session tokens."""
now = time.time()
expired = [token for token, data in self._sessions.items() if now > data["expires_at"]]
for token in expired:
del self._sessions[token]
def principal_for_key(self, access_key: str) -> Principal: def principal_for_key(self, access_key: str) -> Principal:
now = time.time()
cached = self._principal_cache.get(access_key)
if cached:
principal, cached_time = cached
if now - cached_time < self._cache_ttl:
return principal
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
if not record: if not record:
raise IamError("Unknown access key") raise IamError("Unknown access key")
principal = self._build_principal(access_key, record) return self._build_principal(access_key, record)
self._principal_cache[access_key] = (principal, now)
return principal
def secret_for_key(self, access_key: str) -> str: def secret_for_key(self, access_key: str) -> str:
self._maybe_reload() self._maybe_reload()
@@ -309,18 +204,6 @@ class IamService:
if not self._is_allowed(principal, normalized, action): if not self._is_allowed(principal, normalized, action):
raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'") raise IamError(f"Access denied for action '{action}' on bucket '{bucket_name}'")
def check_permissions(self, principal: Principal, bucket_name: str | None, actions: Iterable[str]) -> Dict[str, bool]:
self._maybe_reload()
bucket_name = (bucket_name or "*").lower() if bucket_name != "*" else (bucket_name or "*")
normalized_actions = {a: self._normalize_action(a) for a in actions}
results: Dict[str, bool] = {}
for original, canonical in normalized_actions.items():
if canonical not in ALLOWED_ACTIONS:
results[original] = False
else:
results[original] = self._is_allowed(principal, bucket_name, canonical)
return results
def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]: def buckets_for_principal(self, principal: Principal, buckets: Iterable[str]) -> List[str]:
return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")] return [bucket for bucket in buckets if self._is_allowed(principal, bucket, "list")]
@@ -335,6 +218,7 @@ class IamService:
return True return True
return False return False
# ---------------------- management helpers ----------------------
def list_users(self) -> List[Dict[str, Any]]: def list_users(self) -> List[Dict[str, Any]]:
listing: List[Dict[str, Any]] = [] listing: List[Dict[str, Any]] = []
for access_key, record in self._users.items(): for access_key, record in self._users.items():
@@ -381,10 +265,6 @@ class IamService:
new_secret = self._generate_secret_key() new_secret = self._generate_secret_key()
user["secret_key"] = new_secret user["secret_key"] = new_secret
self._save() self._save()
self._principal_cache.pop(access_key, None)
self._secret_key_cache.pop(access_key, None)
from .s3_api import clear_signing_key_cache
clear_signing_key_cache()
self._load() self._load()
return new_secret return new_secret
@@ -403,10 +283,6 @@ class IamService:
raise IamError("User not found") raise IamError("User not found")
self._raw_config["users"] = remaining self._raw_config["users"] = remaining
self._save() self._save()
self._principal_cache.pop(access_key, None)
self._secret_key_cache.pop(access_key, None)
from .s3_api import clear_signing_key_cache
clear_signing_key_cache()
self._load() self._load()
def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None: def update_user_policies(self, access_key: str, policies: Sequence[Dict[str, Any]]) -> None:
@@ -415,6 +291,7 @@ class IamService:
self._save() self._save()
self._load() self._load()
# ---------------------- config helpers ----------------------
def _load(self) -> None: def _load(self) -> None:
try: try:
self._last_load_time = self.config_path.stat().st_mtime self._last_load_time = self.config_path.stat().st_mtime
@@ -460,6 +337,7 @@ class IamService:
except (OSError, PermissionError) as e: except (OSError, PermissionError) as e:
raise IamError(f"Cannot save IAM config: {e}") raise IamError(f"Cannot save IAM config: {e}")
# ---------------------- insight helpers ----------------------
def config_summary(self) -> Dict[str, Any]: def config_summary(self) -> Dict[str, Any]:
return { return {
"path": str(self.config_path), "path": str(self.config_path),
@@ -541,13 +419,11 @@ class IamService:
return candidate if candidate in ALLOWED_ACTIONS else "" return candidate if candidate in ALLOWED_ACTIONS else ""
def _write_default(self) -> None: def _write_default(self) -> None:
access_key = secrets.token_hex(12)
secret_key = secrets.token_urlsafe(32)
default = { default = {
"users": [ "users": [
{ {
"access_key": access_key, "access_key": "localadmin",
"secret_key": secret_key, "secret_key": "localadmin",
"display_name": "Local Admin", "display_name": "Local Admin",
"policies": [ "policies": [
{"bucket": "*", "actions": list(ALLOWED_ACTIONS)} {"bucket": "*", "actions": list(ALLOWED_ACTIONS)}
@@ -556,14 +432,6 @@ class IamService:
] ]
} }
self.config_path.write_text(json.dumps(default, indent=2)) self.config_path.write_text(json.dumps(default, indent=2))
print(f"\n{'='*60}")
print("MYFSIO FIRST RUN - ADMIN CREDENTIALS GENERATED")
print(f"{'='*60}")
print(f"Access Key: {access_key}")
print(f"Secret Key: {secret_key}")
print(f"{'='*60}")
print(f"Missed this? Check: {self.config_path}")
print(f"{'='*60}\n")
def _generate_access_key(self) -> str: def _generate_access_key(self) -> str:
return secrets.token_hex(8) return secrets.token_hex(8)
@@ -578,33 +446,11 @@ class IamService:
raise IamError("User not found") raise IamError("User not found")
def get_secret_key(self, access_key: str) -> str | None: def get_secret_key(self, access_key: str) -> str | None:
now = time.time()
cached = self._secret_key_cache.get(access_key)
if cached:
secret_key, cached_time = cached
if now - cached_time < self._cache_ttl:
return secret_key
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
if record: return record["secret_key"] if record else None
secret_key = record["secret_key"]
self._secret_key_cache[access_key] = (secret_key, now)
return secret_key
return None
def get_principal(self, access_key: str) -> Principal | None: def get_principal(self, access_key: str) -> Principal | None:
now = time.time()
cached = self._principal_cache.get(access_key)
if cached:
principal, cached_time = cached
if now - cached_time < self._cache_ttl:
return principal
self._maybe_reload() self._maybe_reload()
record = self._users.get(access_key) record = self._users.get(access_key)
if record: return self._build_principal(access_key, record) if record else None
principal = self._build_principal(access_key, record)
self._principal_cache[access_key] = (principal, now)
return principal
return None

View File

@@ -1,12 +1,9 @@
"""Key Management Service (KMS) for encryption key management."""
from __future__ import annotations from __future__ import annotations
import base64 import base64
import json import json
import logging
import os
import secrets import secrets
import subprocess
import sys
import uuid import uuid
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime, timezone from datetime import datetime, timezone
@@ -17,30 +14,6 @@ from cryptography.hazmat.primitives.ciphers.aead import AESGCM
from .encryption import EncryptionError, EncryptionProvider, EncryptionResult from .encryption import EncryptionError, EncryptionProvider, EncryptionResult
if sys.platform != "win32":
import fcntl
logger = logging.getLogger(__name__)
def _set_secure_file_permissions(file_path: Path) -> None:
"""Set restrictive file permissions (owner read/write only)."""
if sys.platform == "win32":
try:
username = os.environ.get("USERNAME", "")
if username:
subprocess.run(
["icacls", str(file_path), "/inheritance:r",
"/grant:r", f"{username}:F"],
check=True, capture_output=True
)
else:
logger.warning("Could not set secure permissions on %s: USERNAME not set", file_path)
except (subprocess.SubprocessError, OSError) as exc:
logger.warning("Failed to set secure permissions on %s: %s", file_path, exc)
else:
os.chmod(file_path, 0o600)
@dataclass @dataclass
class KMSKey: class KMSKey:
@@ -102,11 +75,11 @@ class KMSEncryptionProvider(EncryptionProvider):
def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult: def encrypt(self, plaintext: bytes, context: Dict[str, str] | None = None) -> EncryptionResult:
"""Encrypt data using envelope encryption with KMS.""" """Encrypt data using envelope encryption with KMS."""
data_key, encrypted_data_key = self.generate_data_key() data_key, encrypted_data_key = self.generate_data_key()
aesgcm = AESGCM(data_key) aesgcm = AESGCM(data_key)
nonce = secrets.token_bytes(12) nonce = secrets.token_bytes(12)
ciphertext = aesgcm.encrypt(nonce, plaintext, ciphertext = aesgcm.encrypt(nonce, plaintext,
json.dumps(context, sort_keys=True).encode() if context else None) json.dumps(context).encode() if context else None)
return EncryptionResult( return EncryptionResult(
ciphertext=ciphertext, ciphertext=ciphertext,
@@ -118,26 +91,15 @@ class KMSEncryptionProvider(EncryptionProvider):
def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes, def decrypt(self, ciphertext: bytes, nonce: bytes, encrypted_data_key: bytes,
key_id: str, context: Dict[str, str] | None = None) -> bytes: key_id: str, context: Dict[str, str] | None = None) -> bytes:
"""Decrypt data using envelope encryption with KMS.""" """Decrypt data using envelope encryption with KMS."""
# Note: Data key is encrypted without context (AAD), so we decrypt without context
data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None) data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
if len(data_key) != 32:
raise EncryptionError("Invalid data key size")
aesgcm = AESGCM(data_key) aesgcm = AESGCM(data_key)
try: try:
return aesgcm.decrypt(nonce, ciphertext, return aesgcm.decrypt(nonce, ciphertext,
json.dumps(context, sort_keys=True).encode() if context else None) json.dumps(context).encode() if context else None)
except Exception as exc: except Exception as exc:
logger.debug("KMS decryption failed: %s", exc) raise EncryptionError(f"Failed to decrypt data: {exc}") from exc
raise EncryptionError("Failed to decrypt data") from exc
def decrypt_data_key(self, encrypted_data_key: bytes, key_id: str | None = None) -> bytes:
"""Decrypt an encrypted data key using KMS."""
if key_id is None:
key_id = self.key_id
data_key = self.kms.decrypt_data_key(key_id, encrypted_data_key, context=None)
if len(data_key) != 32:
raise EncryptionError("Invalid data key size")
return data_key
class KMSManager: class KMSManager:
@@ -147,52 +109,27 @@ class KMSManager:
Keys are stored encrypted on disk. Keys are stored encrypted on disk.
""" """
def __init__( def __init__(self, keys_path: Path, master_key_path: Path):
self,
keys_path: Path,
master_key_path: Path,
generate_data_key_min_bytes: int = 1,
generate_data_key_max_bytes: int = 1024,
):
self.keys_path = keys_path self.keys_path = keys_path
self.master_key_path = master_key_path self.master_key_path = master_key_path
self.generate_data_key_min_bytes = generate_data_key_min_bytes
self.generate_data_key_max_bytes = generate_data_key_max_bytes
self._keys: Dict[str, KMSKey] = {} self._keys: Dict[str, KMSKey] = {}
self._master_key: bytes | None = None self._master_key: bytes | None = None
self._master_aesgcm: AESGCM | None = None
self._loaded = False self._loaded = False
@property @property
def master_key(self) -> bytes: def master_key(self) -> bytes:
"""Load or create the master key for encrypting KMS keys (with file locking).""" """Load or create the master key for encrypting KMS keys."""
if self._master_key is None: if self._master_key is None:
lock_path = self.master_key_path.with_suffix(".lock") if self.master_key_path.exists():
lock_path.parent.mkdir(parents=True, exist_ok=True) self._master_key = base64.b64decode(
with open(lock_path, "w") as lock_file: self.master_key_path.read_text().strip()
if sys.platform == "win32": )
import msvcrt else:
msvcrt.locking(lock_file.fileno(), msvcrt.LK_LOCK, 1) self._master_key = secrets.token_bytes(32)
else: self.master_key_path.parent.mkdir(parents=True, exist_ok=True)
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX) self.master_key_path.write_text(
try: base64.b64encode(self._master_key).decode()
if self.master_key_path.exists(): )
self._master_key = base64.b64decode(
self.master_key_path.read_text().strip()
)
else:
self._master_key = secrets.token_bytes(32)
self.master_key_path.write_text(
base64.b64encode(self._master_key).decode()
)
_set_secure_file_permissions(self.master_key_path)
finally:
if sys.platform == "win32":
import msvcrt
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
else:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
self._master_aesgcm = AESGCM(self._master_key)
return self._master_key return self._master_key
def _load_keys(self) -> None: def _load_keys(self) -> None:
@@ -209,10 +146,8 @@ class KMSManager:
encrypted = base64.b64decode(key_data["EncryptedKeyMaterial"]) encrypted = base64.b64decode(key_data["EncryptedKeyMaterial"])
key.key_material = self._decrypt_key_material(encrypted) key.key_material = self._decrypt_key_material(encrypted)
self._keys[key.key_id] = key self._keys[key.key_id] = key
except json.JSONDecodeError as exc: except Exception:
logger.error("Failed to parse KMS keys file: %s", exc) pass
except (ValueError, KeyError) as exc:
logger.error("Invalid KMS key data: %s", exc)
self._loaded = True self._loaded = True
@@ -224,25 +159,26 @@ class KMSManager:
encrypted = self._encrypt_key_material(key.key_material) encrypted = self._encrypt_key_material(key.key_material)
data["EncryptedKeyMaterial"] = base64.b64encode(encrypted).decode() data["EncryptedKeyMaterial"] = base64.b64encode(encrypted).decode()
keys_data.append(data) keys_data.append(data)
self.keys_path.parent.mkdir(parents=True, exist_ok=True) self.keys_path.parent.mkdir(parents=True, exist_ok=True)
self.keys_path.write_text( self.keys_path.write_text(
json.dumps({"keys": keys_data}, indent=2), json.dumps({"keys": keys_data}, indent=2),
encoding="utf-8" encoding="utf-8"
) )
_set_secure_file_permissions(self.keys_path)
def _encrypt_key_material(self, key_material: bytes) -> bytes: def _encrypt_key_material(self, key_material: bytes) -> bytes:
_ = self.master_key """Encrypt key material with the master key."""
aesgcm = AESGCM(self.master_key)
nonce = secrets.token_bytes(12) nonce = secrets.token_bytes(12)
ciphertext = self._master_aesgcm.encrypt(nonce, key_material, None) ciphertext = aesgcm.encrypt(nonce, key_material, None)
return nonce + ciphertext return nonce + ciphertext
def _decrypt_key_material(self, encrypted: bytes) -> bytes: def _decrypt_key_material(self, encrypted: bytes) -> bytes:
_ = self.master_key """Decrypt key material with the master key."""
aesgcm = AESGCM(self.master_key)
nonce = encrypted[:12] nonce = encrypted[:12]
ciphertext = encrypted[12:] ciphertext = encrypted[12:]
return self._master_aesgcm.decrypt(nonce, ciphertext, None) return aesgcm.decrypt(nonce, ciphertext, None)
def create_key(self, description: str = "", key_id: str | None = None) -> KMSKey: def create_key(self, description: str = "", key_id: str | None = None) -> KMSKey:
"""Create a new KMS key.""" """Create a new KMS key."""
@@ -275,27 +211,7 @@ class KMSManager:
"""List all keys.""" """List all keys."""
self._load_keys() self._load_keys()
return list(self._keys.values()) return list(self._keys.values())
def get_default_key_id(self) -> str:
"""Get the default KMS key ID, creating one if none exist."""
self._load_keys()
for key in self._keys.values():
if key.enabled:
return key.key_id
default_key = self.create_key(description="Default KMS Key")
return default_key.key_id
def get_provider(self, key_id: str | None = None) -> "KMSEncryptionProvider":
"""Get a KMS encryption provider for the specified key."""
if key_id is None:
key_id = self.get_default_key_id()
key = self.get_key(key_id)
if not key:
raise EncryptionError(f"Key not found: {key_id}")
if not key.enabled:
raise EncryptionError(f"Key is disabled: {key_id}")
return KMSEncryptionProvider(self, key_id)
def enable_key(self, key_id: str) -> None: def enable_key(self, key_id: str) -> None:
"""Enable a key.""" """Enable a key."""
self._load_keys() self._load_keys()
@@ -334,7 +250,7 @@ class KMSManager:
aesgcm = AESGCM(key.key_material) aesgcm = AESGCM(key.key_material)
nonce = secrets.token_bytes(12) nonce = secrets.token_bytes(12)
aad = json.dumps(context, sort_keys=True).encode() if context else None aad = json.dumps(context).encode() if context else None
ciphertext = aesgcm.encrypt(nonce, plaintext, aad) ciphertext = aesgcm.encrypt(nonce, plaintext, aad)
key_id_bytes = key_id.encode("utf-8") key_id_bytes = key_id.encode("utf-8")
@@ -363,24 +279,17 @@ class KMSManager:
encrypted = rest[12:] encrypted = rest[12:]
aesgcm = AESGCM(key.key_material) aesgcm = AESGCM(key.key_material)
aad = json.dumps(context, sort_keys=True).encode() if context else None aad = json.dumps(context).encode() if context else None
try: try:
plaintext = aesgcm.decrypt(nonce, encrypted, aad) plaintext = aesgcm.decrypt(nonce, encrypted, aad)
return plaintext, key_id return plaintext, key_id
except Exception as exc: except Exception as exc:
logger.debug("KMS decrypt operation failed: %s", exc) raise EncryptionError(f"Decryption failed: {exc}") from exc
raise EncryptionError("Decryption failed") from exc
def generate_data_key(self, key_id: str, def generate_data_key(self, key_id: str,
context: Dict[str, str] | None = None, context: Dict[str, str] | None = None) -> tuple[bytes, bytes]:
key_spec: str = "AES_256") -> tuple[bytes, bytes]:
"""Generate a data key and return both plaintext and encrypted versions. """Generate a data key and return both plaintext and encrypted versions.
Args:
key_id: The KMS key ID to use for encryption
context: Optional encryption context
key_spec: Key specification - AES_128 or AES_256 (default)
Returns: Returns:
Tuple of (plaintext_key, encrypted_key) Tuple of (plaintext_key, encrypted_key)
""" """
@@ -390,12 +299,11 @@ class KMSManager:
raise EncryptionError(f"Key not found: {key_id}") raise EncryptionError(f"Key not found: {key_id}")
if not key.enabled: if not key.enabled:
raise EncryptionError(f"Key is disabled: {key_id}") raise EncryptionError(f"Key is disabled: {key_id}")
key_bytes = 32 if key_spec == "AES_256" else 16 plaintext_key = secrets.token_bytes(32)
plaintext_key = secrets.token_bytes(key_bytes)
encrypted_key = self.encrypt(key_id, plaintext_key, context) encrypted_key = self.encrypt(key_id, plaintext_key, context)
return plaintext_key, encrypted_key return plaintext_key, encrypted_key
def decrypt_data_key(self, key_id: str, encrypted_key: bytes, def decrypt_data_key(self, key_id: str, encrypted_key: bytes,
@@ -404,6 +312,22 @@ class KMSManager:
plaintext, _ = self.decrypt(encrypted_key, context) plaintext, _ = self.decrypt(encrypted_key, context)
return plaintext return plaintext
def get_provider(self, key_id: str | None = None) -> KMSEncryptionProvider:
"""Get an encryption provider for a specific key."""
self._load_keys()
if key_id is None:
if not self._keys:
key = self.create_key("Default KMS Key")
key_id = key.key_id
else:
key_id = next(iter(self._keys.keys()))
if key_id not in self._keys:
raise EncryptionError(f"Key not found: {key_id}")
return KMSEncryptionProvider(self, key_id)
def re_encrypt(self, ciphertext: bytes, destination_key_id: str, def re_encrypt(self, ciphertext: bytes, destination_key_id: str,
source_context: Dict[str, str] | None = None, source_context: Dict[str, str] | None = None,
destination_context: Dict[str, str] | None = None) -> bytes: destination_context: Dict[str, str] | None = None) -> bytes:
@@ -415,8 +339,6 @@ class KMSManager:
def generate_random(self, num_bytes: int = 32) -> bytes: def generate_random(self, num_bytes: int = 32) -> bytes:
"""Generate cryptographically secure random bytes.""" """Generate cryptographically secure random bytes."""
if num_bytes < self.generate_data_key_min_bytes or num_bytes > self.generate_data_key_max_bytes: if num_bytes < 1 or num_bytes > 1024:
raise EncryptionError( raise EncryptionError("Number of bytes must be between 1 and 1024")
f"Number of bytes must be between {self.generate_data_key_min_bytes} and {self.generate_data_key_max_bytes}"
)
return secrets.token_bytes(num_bytes) return secrets.token_bytes(num_bytes)

View File

@@ -1,3 +1,4 @@
"""KMS and encryption API endpoints."""
from __future__ import annotations from __future__ import annotations
import base64 import base64
@@ -32,6 +33,9 @@ def _encryption():
def _error_response(code: str, message: str, status: int) -> tuple[Dict[str, Any], int]: def _error_response(code: str, message: str, status: int) -> tuple[Dict[str, Any], int]:
return {"__type": code, "message": message}, status return {"__type": code, "message": message}, status
# ---------------------- Key Management ----------------------
@kms_api_bp.route("/keys", methods=["GET", "POST"]) @kms_api_bp.route("/keys", methods=["GET", "POST"])
@limiter.limit("30 per minute") @limiter.limit("30 per minute")
def list_or_create_keys(): def list_or_create_keys():
@@ -61,6 +65,7 @@ def list_or_create_keys():
except EncryptionError as exc: except EncryptionError as exc:
return _error_response("KMSInternalException", str(exc), 400) return _error_response("KMSInternalException", str(exc), 400)
# GET - List keys
keys = kms.list_keys() keys = kms.list_keys()
return jsonify({ return jsonify({
"Keys": [{"KeyId": k.key_id, "KeyArn": k.arn} for k in keys], "Keys": [{"KeyId": k.key_id, "KeyArn": k.arn} for k in keys],
@@ -91,6 +96,7 @@ def get_or_delete_key(key_id: str):
except EncryptionError as exc: except EncryptionError as exc:
return _error_response("NotFoundException", str(exc), 404) return _error_response("NotFoundException", str(exc), 404)
# GET
key = kms.get_key(key_id) key = kms.get_key(key_id)
if not key: if not key:
return _error_response("NotFoundException", f"Key not found: {key_id}", 404) return _error_response("NotFoundException", f"Key not found: {key_id}", 404)
@@ -143,6 +149,9 @@ def disable_key(key_id: str):
except EncryptionError as exc: except EncryptionError as exc:
return _error_response("NotFoundException", str(exc), 404) return _error_response("NotFoundException", str(exc), 404)
# ---------------------- Encryption Operations ----------------------
@kms_api_bp.route("/encrypt", methods=["POST"]) @kms_api_bp.route("/encrypt", methods=["POST"])
@limiter.limit("60 per minute") @limiter.limit("60 per minute")
def encrypt_data(): def encrypt_data():
@@ -242,6 +251,7 @@ def generate_data_key():
try: try:
plaintext_key, encrypted_key = kms.generate_data_key(key_id, context) plaintext_key, encrypted_key = kms.generate_data_key(key_id, context)
# Trim key if AES_128 requested
if key_spec == "AES_128": if key_spec == "AES_128":
plaintext_key = plaintext_key[:16] plaintext_key = plaintext_key[:16]
@@ -312,7 +322,10 @@ def re_encrypt():
return _error_response("ValidationException", "CiphertextBlob must be base64 encoded", 400) return _error_response("ValidationException", "CiphertextBlob must be base64 encoded", 400)
try: try:
# First decrypt, get source key id
plaintext, source_key_id = kms.decrypt(ciphertext, source_context) plaintext, source_key_id = kms.decrypt(ciphertext, source_context)
# Re-encrypt with destination key
new_ciphertext = kms.encrypt(destination_key_id, plaintext, destination_context) new_ciphertext = kms.encrypt(destination_key_id, plaintext, destination_context)
return jsonify({ return jsonify({
@@ -352,6 +365,9 @@ def generate_random():
except EncryptionError as exc: except EncryptionError as exc:
return _error_response("ValidationException", str(exc), 400) return _error_response("ValidationException", str(exc), 400)
# ---------------------- Client-Side Encryption Helpers ----------------------
@kms_api_bp.route("/client/generate-key", methods=["POST"]) @kms_api_bp.route("/client/generate-key", methods=["POST"])
@limiter.limit("30 per minute") @limiter.limit("30 per minute")
def generate_client_key(): def generate_client_key():
@@ -411,6 +427,9 @@ def client_decrypt():
except Exception as exc: except Exception as exc:
return _error_response("DecryptionError", str(exc), 400) return _error_response("DecryptionError", str(exc), 400)
# ---------------------- Encryption Materials for S3 Client-Side Encryption ----------------------
@kms_api_bp.route("/materials/<key_id>", methods=["POST"]) @kms_api_bp.route("/materials/<key_id>", methods=["POST"])
@limiter.limit("60 per minute") @limiter.limit("60 per minute")
def get_encryption_materials(key_id: str): def get_encryption_materials(key_id: str):

View File

@@ -1,340 +0,0 @@
from __future__ import annotations
import json
import logging
import threading
import time
from dataclasses import dataclass, field
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
from .storage import ObjectStorage, StorageError
logger = logging.getLogger(__name__)
@dataclass
class LifecycleResult:
bucket_name: str
objects_deleted: int = 0
versions_deleted: int = 0
uploads_aborted: int = 0
errors: List[str] = field(default_factory=list)
execution_time_seconds: float = 0.0
@dataclass
class LifecycleExecutionRecord:
timestamp: float
bucket_name: str
objects_deleted: int
versions_deleted: int
uploads_aborted: int
errors: List[str]
execution_time_seconds: float
def to_dict(self) -> dict:
return {
"timestamp": self.timestamp,
"bucket_name": self.bucket_name,
"objects_deleted": self.objects_deleted,
"versions_deleted": self.versions_deleted,
"uploads_aborted": self.uploads_aborted,
"errors": self.errors,
"execution_time_seconds": self.execution_time_seconds,
}
@classmethod
def from_dict(cls, data: dict) -> "LifecycleExecutionRecord":
return cls(
timestamp=data["timestamp"],
bucket_name=data["bucket_name"],
objects_deleted=data["objects_deleted"],
versions_deleted=data["versions_deleted"],
uploads_aborted=data["uploads_aborted"],
errors=data.get("errors", []),
execution_time_seconds=data["execution_time_seconds"],
)
@classmethod
def from_result(cls, result: LifecycleResult) -> "LifecycleExecutionRecord":
return cls(
timestamp=time.time(),
bucket_name=result.bucket_name,
objects_deleted=result.objects_deleted,
versions_deleted=result.versions_deleted,
uploads_aborted=result.uploads_aborted,
errors=result.errors.copy(),
execution_time_seconds=result.execution_time_seconds,
)
class LifecycleHistoryStore:
def __init__(self, storage_root: Path, max_history_per_bucket: int = 50) -> None:
self.storage_root = storage_root
self.max_history_per_bucket = max_history_per_bucket
self._lock = threading.Lock()
def _get_history_path(self, bucket_name: str) -> Path:
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "lifecycle_history.json"
def load_history(self, bucket_name: str) -> List[LifecycleExecutionRecord]:
path = self._get_history_path(bucket_name)
if not path.exists():
return []
try:
with open(path, "r") as f:
data = json.load(f)
return [LifecycleExecutionRecord.from_dict(d) for d in data.get("executions", [])]
except (OSError, ValueError, KeyError) as e:
logger.error(f"Failed to load lifecycle history for {bucket_name}: {e}")
return []
def save_history(self, bucket_name: str, records: List[LifecycleExecutionRecord]) -> None:
path = self._get_history_path(bucket_name)
path.parent.mkdir(parents=True, exist_ok=True)
data = {"executions": [r.to_dict() for r in records[:self.max_history_per_bucket]]}
try:
with open(path, "w") as f:
json.dump(data, f, indent=2)
except OSError as e:
logger.error(f"Failed to save lifecycle history for {bucket_name}: {e}")
def add_record(self, bucket_name: str, record: LifecycleExecutionRecord) -> None:
with self._lock:
records = self.load_history(bucket_name)
records.insert(0, record)
self.save_history(bucket_name, records)
def get_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]:
records = self.load_history(bucket_name)
return records[offset:offset + limit]
class LifecycleManager:
def __init__(
self,
storage: ObjectStorage,
interval_seconds: int = 3600,
storage_root: Optional[Path] = None,
max_history_per_bucket: int = 50,
):
self.storage = storage
self.interval_seconds = interval_seconds
self.storage_root = storage_root
self._timer: Optional[threading.Timer] = None
self._shutdown = False
self._lock = threading.Lock()
self.history_store = LifecycleHistoryStore(storage_root, max_history_per_bucket) if storage_root else None
def start(self) -> None:
if self._timer is not None:
return
self._shutdown = False
self._schedule_next()
logger.info(f"Lifecycle manager started with interval {self.interval_seconds}s")
def stop(self) -> None:
self._shutdown = True
if self._timer:
self._timer.cancel()
self._timer = None
logger.info("Lifecycle manager stopped")
def _schedule_next(self) -> None:
if self._shutdown:
return
self._timer = threading.Timer(self.interval_seconds, self._run_enforcement)
self._timer.daemon = True
self._timer.start()
def _run_enforcement(self) -> None:
if self._shutdown:
return
try:
self.enforce_all_buckets()
except Exception as e:
logger.error(f"Lifecycle enforcement failed: {e}")
finally:
self._schedule_next()
def enforce_all_buckets(self) -> Dict[str, LifecycleResult]:
results = {}
try:
buckets = self.storage.list_buckets()
for bucket in buckets:
result = self.enforce_rules(bucket.name)
if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0:
results[bucket.name] = result
except StorageError as e:
logger.error(f"Failed to list buckets for lifecycle: {e}")
return results
def enforce_rules(self, bucket_name: str) -> LifecycleResult:
start_time = time.time()
result = LifecycleResult(bucket_name=bucket_name)
try:
lifecycle = self.storage.get_bucket_lifecycle(bucket_name)
if not lifecycle:
return result
for rule in lifecycle:
if rule.get("Status") != "Enabled":
continue
rule_id = rule.get("ID", "unknown")
prefix = rule.get("Prefix", rule.get("Filter", {}).get("Prefix", ""))
self._enforce_expiration(bucket_name, rule, prefix, result)
self._enforce_noncurrent_expiration(bucket_name, rule, prefix, result)
self._enforce_abort_multipart(bucket_name, rule, result)
except StorageError as e:
result.errors.append(str(e))
logger.error(f"Lifecycle enforcement error for {bucket_name}: {e}")
result.execution_time_seconds = time.time() - start_time
if result.objects_deleted > 0 or result.versions_deleted > 0 or result.uploads_aborted > 0 or result.errors:
logger.info(
f"Lifecycle enforcement for {bucket_name}: "
f"deleted={result.objects_deleted}, versions={result.versions_deleted}, "
f"aborted={result.uploads_aborted}, time={result.execution_time_seconds:.2f}s"
)
if self.history_store:
record = LifecycleExecutionRecord.from_result(result)
self.history_store.add_record(bucket_name, record)
return result
def _enforce_expiration(
self, bucket_name: str, rule: Dict[str, Any], prefix: str, result: LifecycleResult
) -> None:
expiration = rule.get("Expiration", {})
if not expiration:
return
days = expiration.get("Days")
date_str = expiration.get("Date")
if days:
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
elif date_str:
try:
cutoff = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
except ValueError:
return
else:
return
try:
objects = self.storage.list_objects_all(bucket_name)
for obj in objects:
if prefix and not obj.key.startswith(prefix):
continue
if obj.last_modified < cutoff:
try:
self.storage.delete_object(bucket_name, obj.key)
result.objects_deleted += 1
except StorageError as e:
result.errors.append(f"Failed to delete {obj.key}: {e}")
except StorageError as e:
result.errors.append(f"Failed to list objects: {e}")
def _enforce_noncurrent_expiration(
self, bucket_name: str, rule: Dict[str, Any], prefix: str, result: LifecycleResult
) -> None:
noncurrent = rule.get("NoncurrentVersionExpiration", {})
noncurrent_days = noncurrent.get("NoncurrentDays")
if not noncurrent_days:
return
cutoff = datetime.now(timezone.utc) - timedelta(days=noncurrent_days)
try:
objects = self.storage.list_objects_all(bucket_name)
for obj in objects:
if prefix and not obj.key.startswith(prefix):
continue
try:
versions = self.storage.list_object_versions(bucket_name, obj.key)
for version in versions:
archived_at_str = version.get("archived_at", "")
if not archived_at_str:
continue
try:
archived_at = datetime.fromisoformat(archived_at_str.replace("Z", "+00:00"))
if archived_at < cutoff:
version_id = version.get("version_id")
if version_id:
self.storage.delete_object_version(bucket_name, obj.key, version_id)
result.versions_deleted += 1
except (ValueError, StorageError) as e:
result.errors.append(f"Failed to process version: {e}")
except StorageError:
pass
except StorageError as e:
result.errors.append(f"Failed to list objects: {e}")
try:
orphaned = self.storage.list_orphaned_objects(bucket_name)
for item in orphaned:
obj_key = item.get("key", "")
if prefix and not obj_key.startswith(prefix):
continue
try:
versions = self.storage.list_object_versions(bucket_name, obj_key)
for version in versions:
archived_at_str = version.get("archived_at", "")
if not archived_at_str:
continue
try:
archived_at = datetime.fromisoformat(archived_at_str.replace("Z", "+00:00"))
if archived_at < cutoff:
version_id = version.get("version_id")
if version_id:
self.storage.delete_object_version(bucket_name, obj_key, version_id)
result.versions_deleted += 1
except (ValueError, StorageError) as e:
result.errors.append(f"Failed to process orphaned version: {e}")
except StorageError:
pass
except StorageError as e:
result.errors.append(f"Failed to list orphaned objects: {e}")
def _enforce_abort_multipart(
self, bucket_name: str, rule: Dict[str, Any], result: LifecycleResult
) -> None:
abort_config = rule.get("AbortIncompleteMultipartUpload", {})
days_after = abort_config.get("DaysAfterInitiation")
if not days_after:
return
cutoff = datetime.now(timezone.utc) - timedelta(days=days_after)
try:
uploads = self.storage.list_multipart_uploads(bucket_name)
for upload in uploads:
created_at_str = upload.get("created_at", "")
if not created_at_str:
continue
try:
created_at = datetime.fromisoformat(created_at_str.replace("Z", "+00:00"))
if created_at < cutoff:
upload_id = upload.get("upload_id")
if upload_id:
self.storage.abort_multipart_upload(bucket_name, upload_id)
result.uploads_aborted += 1
except (ValueError, StorageError) as e:
result.errors.append(f"Failed to abort upload: {e}")
except StorageError as e:
result.errors.append(f"Failed to list multipart uploads: {e}")
def run_now(self, bucket_name: Optional[str] = None) -> Dict[str, LifecycleResult]:
if bucket_name:
return {bucket_name: self.enforce_rules(bucket_name)}
return self.enforce_all_buckets()
def get_execution_history(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[LifecycleExecutionRecord]:
if not self.history_store:
return []
return self.history_store.get_history(bucket_name, limit, offset)

View File

@@ -1,381 +0,0 @@
from __future__ import annotations
import ipaddress
import json
import logging
import queue
import socket
import threading
import time
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
from urllib.parse import urlparse
import requests
def _is_safe_url(url: str, allow_internal: bool = False) -> bool:
"""Check if a URL is safe to make requests to (not internal/private).
Args:
url: The URL to check.
allow_internal: If True, allows internal/private IP addresses.
Use for self-hosted deployments on internal networks.
"""
try:
parsed = urlparse(url)
hostname = parsed.hostname
if not hostname:
return False
cloud_metadata_hosts = {
"metadata.google.internal",
"169.254.169.254",
}
if hostname.lower() in cloud_metadata_hosts:
return False
if allow_internal:
return True
blocked_hosts = {
"localhost",
"127.0.0.1",
"0.0.0.0",
"::1",
"[::1]",
}
if hostname.lower() in blocked_hosts:
return False
try:
resolved_ip = socket.gethostbyname(hostname)
ip = ipaddress.ip_address(resolved_ip)
if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved:
return False
except (socket.gaierror, ValueError):
return False
return True
except Exception:
return False
logger = logging.getLogger(__name__)
@dataclass
class NotificationEvent:
event_name: str
bucket_name: str
object_key: str
object_size: int = 0
etag: str = ""
version_id: Optional[str] = None
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
request_id: str = field(default_factory=lambda: uuid.uuid4().hex)
source_ip: str = ""
user_identity: str = ""
def to_s3_event(self) -> Dict[str, Any]:
return {
"Records": [
{
"eventVersion": "2.1",
"eventSource": "myfsio:s3",
"awsRegion": "local",
"eventTime": self.timestamp.strftime("%Y-%m-%dT%H:%M:%S.000Z"),
"eventName": self.event_name,
"userIdentity": {
"principalId": self.user_identity or "ANONYMOUS",
},
"requestParameters": {
"sourceIPAddress": self.source_ip or "127.0.0.1",
},
"responseElements": {
"x-amz-request-id": self.request_id,
"x-amz-id-2": self.request_id,
},
"s3": {
"s3SchemaVersion": "1.0",
"configurationId": "notification",
"bucket": {
"name": self.bucket_name,
"ownerIdentity": {"principalId": "local"},
"arn": f"arn:aws:s3:::{self.bucket_name}",
},
"object": {
"key": self.object_key,
"size": self.object_size,
"eTag": self.etag,
"versionId": self.version_id or "null",
"sequencer": f"{int(time.time() * 1000):016X}",
},
},
}
]
}
@dataclass
class WebhookDestination:
url: str
headers: Dict[str, str] = field(default_factory=dict)
timeout_seconds: int = 30
retry_count: int = 3
retry_delay_seconds: int = 1
def to_dict(self) -> Dict[str, Any]:
return {
"url": self.url,
"headers": self.headers,
"timeout_seconds": self.timeout_seconds,
"retry_count": self.retry_count,
"retry_delay_seconds": self.retry_delay_seconds,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "WebhookDestination":
return cls(
url=data.get("url", ""),
headers=data.get("headers", {}),
timeout_seconds=data.get("timeout_seconds", 30),
retry_count=data.get("retry_count", 3),
retry_delay_seconds=data.get("retry_delay_seconds", 1),
)
@dataclass
class NotificationConfiguration:
id: str
events: List[str]
destination: WebhookDestination
prefix_filter: str = ""
suffix_filter: str = ""
def matches_event(self, event_name: str, object_key: str) -> bool:
event_match = False
for pattern in self.events:
if pattern.endswith("*"):
base = pattern[:-1]
if event_name.startswith(base):
event_match = True
break
elif pattern == event_name:
event_match = True
break
if not event_match:
return False
if self.prefix_filter and not object_key.startswith(self.prefix_filter):
return False
if self.suffix_filter and not object_key.endswith(self.suffix_filter):
return False
return True
def to_dict(self) -> Dict[str, Any]:
return {
"Id": self.id,
"Events": self.events,
"Destination": self.destination.to_dict(),
"Filter": {
"Key": {
"FilterRules": [
{"Name": "prefix", "Value": self.prefix_filter},
{"Name": "suffix", "Value": self.suffix_filter},
]
}
},
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "NotificationConfiguration":
prefix = ""
suffix = ""
filter_data = data.get("Filter", {})
key_filter = filter_data.get("Key", {})
for rule in key_filter.get("FilterRules", []):
if rule.get("Name") == "prefix":
prefix = rule.get("Value", "")
elif rule.get("Name") == "suffix":
suffix = rule.get("Value", "")
return cls(
id=data.get("Id", uuid.uuid4().hex),
events=data.get("Events", []),
destination=WebhookDestination.from_dict(data.get("Destination", {})),
prefix_filter=prefix,
suffix_filter=suffix,
)
class NotificationService:
def __init__(self, storage_root: Path, worker_count: int = 2, allow_internal_endpoints: bool = False):
self.storage_root = storage_root
self._allow_internal_endpoints = allow_internal_endpoints
self._configs: Dict[str, List[NotificationConfiguration]] = {}
self._queue: queue.Queue[tuple[NotificationEvent, WebhookDestination]] = queue.Queue()
self._workers: List[threading.Thread] = []
self._shutdown = threading.Event()
self._stats = {
"events_queued": 0,
"events_sent": 0,
"events_failed": 0,
}
for i in range(worker_count):
worker = threading.Thread(target=self._worker_loop, name=f"notification-worker-{i}", daemon=True)
worker.start()
self._workers.append(worker)
def _config_path(self, bucket_name: str) -> Path:
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "notifications.json"
def get_bucket_notifications(self, bucket_name: str) -> List[NotificationConfiguration]:
if bucket_name in self._configs:
return self._configs[bucket_name]
config_path = self._config_path(bucket_name)
if not config_path.exists():
return []
try:
data = json.loads(config_path.read_text(encoding="utf-8"))
configs = [NotificationConfiguration.from_dict(c) for c in data.get("configurations", [])]
self._configs[bucket_name] = configs
return configs
except (json.JSONDecodeError, OSError) as e:
logger.warning(f"Failed to load notification config for {bucket_name}: {e}")
return []
def set_bucket_notifications(
self, bucket_name: str, configurations: List[NotificationConfiguration]
) -> None:
config_path = self._config_path(bucket_name)
config_path.parent.mkdir(parents=True, exist_ok=True)
data = {"configurations": [c.to_dict() for c in configurations]}
config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
self._configs[bucket_name] = configurations
def delete_bucket_notifications(self, bucket_name: str) -> None:
config_path = self._config_path(bucket_name)
try:
if config_path.exists():
config_path.unlink()
except OSError:
pass
self._configs.pop(bucket_name, None)
def emit_event(self, event: NotificationEvent) -> None:
configurations = self.get_bucket_notifications(event.bucket_name)
if not configurations:
return
for config in configurations:
if config.matches_event(event.event_name, event.object_key):
self._queue.put((event, config.destination))
self._stats["events_queued"] += 1
logger.debug(
f"Queued notification for {event.event_name} on {event.bucket_name}/{event.object_key}"
)
def emit_object_created(
self,
bucket_name: str,
object_key: str,
*,
size: int = 0,
etag: str = "",
version_id: Optional[str] = None,
request_id: str = "",
source_ip: str = "",
user_identity: str = "",
operation: str = "Put",
) -> None:
event = NotificationEvent(
event_name=f"s3:ObjectCreated:{operation}",
bucket_name=bucket_name,
object_key=object_key,
object_size=size,
etag=etag,
version_id=version_id,
request_id=request_id or uuid.uuid4().hex,
source_ip=source_ip,
user_identity=user_identity,
)
self.emit_event(event)
def emit_object_removed(
self,
bucket_name: str,
object_key: str,
*,
version_id: Optional[str] = None,
request_id: str = "",
source_ip: str = "",
user_identity: str = "",
operation: str = "Delete",
) -> None:
event = NotificationEvent(
event_name=f"s3:ObjectRemoved:{operation}",
bucket_name=bucket_name,
object_key=object_key,
version_id=version_id,
request_id=request_id or uuid.uuid4().hex,
source_ip=source_ip,
user_identity=user_identity,
)
self.emit_event(event)
def _worker_loop(self) -> None:
while not self._shutdown.is_set():
try:
event, destination = self._queue.get(timeout=1.0)
except queue.Empty:
continue
try:
self._send_notification(event, destination)
self._stats["events_sent"] += 1
except Exception as e:
self._stats["events_failed"] += 1
logger.error(f"Failed to send notification: {e}")
finally:
self._queue.task_done()
def _send_notification(self, event: NotificationEvent, destination: WebhookDestination) -> None:
if not _is_safe_url(destination.url, allow_internal=self._allow_internal_endpoints):
raise RuntimeError(f"Blocked request to cloud metadata service (SSRF protection): {destination.url}")
payload = event.to_s3_event()
headers = {"Content-Type": "application/json", **destination.headers}
last_error = None
for attempt in range(destination.retry_count):
try:
response = requests.post(
destination.url,
json=payload,
headers=headers,
timeout=destination.timeout_seconds,
)
if response.status_code < 400:
logger.info(
f"Notification sent: {event.event_name} -> {destination.url} (status={response.status_code})"
)
return
last_error = f"HTTP {response.status_code}: {response.text[:200]}"
except requests.RequestException as e:
last_error = str(e)
if attempt < destination.retry_count - 1:
time.sleep(destination.retry_delay_seconds * (attempt + 1))
raise RuntimeError(f"Failed after {destination.retry_count} attempts: {last_error}")
def get_stats(self) -> Dict[str, int]:
return dict(self._stats)
def shutdown(self) -> None:
self._shutdown.set()
for worker in self._workers:
worker.join(timeout=5.0)

View File

@@ -1,234 +0,0 @@
from __future__ import annotations
import json
from dataclasses import dataclass
from datetime import datetime, timezone
from enum import Enum
from pathlib import Path
from typing import Any, Dict, Optional
class RetentionMode(Enum):
GOVERNANCE = "GOVERNANCE"
COMPLIANCE = "COMPLIANCE"
class ObjectLockError(Exception):
pass
@dataclass
class ObjectLockRetention:
mode: RetentionMode
retain_until_date: datetime
def to_dict(self) -> Dict[str, str]:
return {
"Mode": self.mode.value,
"RetainUntilDate": self.retain_until_date.isoformat(),
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> Optional["ObjectLockRetention"]:
if not data:
return None
mode_str = data.get("Mode")
date_str = data.get("RetainUntilDate")
if not mode_str or not date_str:
return None
try:
mode = RetentionMode(mode_str)
retain_until = datetime.fromisoformat(date_str.replace("Z", "+00:00"))
return cls(mode=mode, retain_until_date=retain_until)
except (ValueError, KeyError):
return None
def is_expired(self) -> bool:
return datetime.now(timezone.utc) > self.retain_until_date
@dataclass
class ObjectLockConfig:
enabled: bool = False
default_retention: Optional[ObjectLockRetention] = None
def to_dict(self) -> Dict[str, Any]:
result: Dict[str, Any] = {"ObjectLockEnabled": "Enabled" if self.enabled else "Disabled"}
if self.default_retention:
result["Rule"] = {
"DefaultRetention": {
"Mode": self.default_retention.mode.value,
"Days": None,
"Years": None,
}
}
return result
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "ObjectLockConfig":
enabled = data.get("ObjectLockEnabled") == "Enabled"
default_retention = None
rule = data.get("Rule")
if rule and "DefaultRetention" in rule:
dr = rule["DefaultRetention"]
mode_str = dr.get("Mode", "GOVERNANCE")
days = dr.get("Days")
years = dr.get("Years")
if days or years:
from datetime import timedelta
now = datetime.now(timezone.utc)
if years:
delta = timedelta(days=int(years) * 365)
else:
delta = timedelta(days=int(days))
default_retention = ObjectLockRetention(
mode=RetentionMode(mode_str),
retain_until_date=now + delta,
)
return cls(enabled=enabled, default_retention=default_retention)
class ObjectLockService:
def __init__(self, storage_root: Path):
self.storage_root = storage_root
self._config_cache: Dict[str, ObjectLockConfig] = {}
def _bucket_lock_config_path(self, bucket_name: str) -> Path:
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "object_lock.json"
def _object_lock_meta_path(self, bucket_name: str, object_key: str) -> Path:
safe_key = object_key.replace("/", "_").replace("\\", "_")
return (
self.storage_root / ".myfsio.sys" / "buckets" / bucket_name /
"locks" / f"{safe_key}.lock.json"
)
def get_bucket_lock_config(self, bucket_name: str) -> ObjectLockConfig:
if bucket_name in self._config_cache:
return self._config_cache[bucket_name]
config_path = self._bucket_lock_config_path(bucket_name)
if not config_path.exists():
return ObjectLockConfig(enabled=False)
try:
data = json.loads(config_path.read_text(encoding="utf-8"))
config = ObjectLockConfig.from_dict(data)
self._config_cache[bucket_name] = config
return config
except (json.JSONDecodeError, OSError):
return ObjectLockConfig(enabled=False)
def set_bucket_lock_config(self, bucket_name: str, config: ObjectLockConfig) -> None:
config_path = self._bucket_lock_config_path(bucket_name)
config_path.parent.mkdir(parents=True, exist_ok=True)
config_path.write_text(json.dumps(config.to_dict()), encoding="utf-8")
self._config_cache[bucket_name] = config
def enable_bucket_lock(self, bucket_name: str) -> None:
config = self.get_bucket_lock_config(bucket_name)
config.enabled = True
self.set_bucket_lock_config(bucket_name, config)
def is_bucket_lock_enabled(self, bucket_name: str) -> bool:
return self.get_bucket_lock_config(bucket_name).enabled
def get_object_retention(self, bucket_name: str, object_key: str) -> Optional[ObjectLockRetention]:
meta_path = self._object_lock_meta_path(bucket_name, object_key)
if not meta_path.exists():
return None
try:
data = json.loads(meta_path.read_text(encoding="utf-8"))
return ObjectLockRetention.from_dict(data.get("retention", {}))
except (json.JSONDecodeError, OSError):
return None
def set_object_retention(
self,
bucket_name: str,
object_key: str,
retention: ObjectLockRetention,
bypass_governance: bool = False,
) -> None:
existing = self.get_object_retention(bucket_name, object_key)
if existing and not existing.is_expired():
if existing.mode == RetentionMode.COMPLIANCE:
raise ObjectLockError(
"Cannot modify retention on object with COMPLIANCE mode until retention expires"
)
if existing.mode == RetentionMode.GOVERNANCE and not bypass_governance:
raise ObjectLockError(
"Cannot modify GOVERNANCE retention without bypass-governance permission"
)
meta_path = self._object_lock_meta_path(bucket_name, object_key)
meta_path.parent.mkdir(parents=True, exist_ok=True)
existing_data: Dict[str, Any] = {}
if meta_path.exists():
try:
existing_data = json.loads(meta_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
pass
existing_data["retention"] = retention.to_dict()
meta_path.write_text(json.dumps(existing_data), encoding="utf-8")
def get_legal_hold(self, bucket_name: str, object_key: str) -> bool:
meta_path = self._object_lock_meta_path(bucket_name, object_key)
if not meta_path.exists():
return False
try:
data = json.loads(meta_path.read_text(encoding="utf-8"))
return data.get("legal_hold", False)
except (json.JSONDecodeError, OSError):
return False
def set_legal_hold(self, bucket_name: str, object_key: str, enabled: bool) -> None:
meta_path = self._object_lock_meta_path(bucket_name, object_key)
meta_path.parent.mkdir(parents=True, exist_ok=True)
existing_data: Dict[str, Any] = {}
if meta_path.exists():
try:
existing_data = json.loads(meta_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, OSError):
pass
existing_data["legal_hold"] = enabled
meta_path.write_text(json.dumps(existing_data), encoding="utf-8")
def can_delete_object(
self,
bucket_name: str,
object_key: str,
bypass_governance: bool = False,
) -> tuple[bool, str]:
if self.get_legal_hold(bucket_name, object_key):
return False, "Object is under legal hold"
retention = self.get_object_retention(bucket_name, object_key)
if retention and not retention.is_expired():
if retention.mode == RetentionMode.COMPLIANCE:
return False, f"Object is locked in COMPLIANCE mode until {retention.retain_until_date.isoformat()}"
if retention.mode == RetentionMode.GOVERNANCE:
if not bypass_governance:
return False, f"Object is locked in GOVERNANCE mode until {retention.retain_until_date.isoformat()}"
return True, ""
def can_overwrite_object(
self,
bucket_name: str,
object_key: str,
bypass_governance: bool = False,
) -> tuple[bool, str]:
return self.can_delete_object(bucket_name, object_key, bypass_governance)
def delete_object_lock_metadata(self, bucket_name: str, object_key: str) -> None:
meta_path = self._object_lock_meta_path(bucket_name, object_key)
try:
if meta_path.exists():
meta_path.unlink()
except OSError:
pass

View File

@@ -1,300 +0,0 @@
from __future__ import annotations
import json
import logging
import random
import threading
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
MAX_LATENCY_SAMPLES = 5000
logger = logging.getLogger(__name__)
@dataclass
class OperationStats:
count: int = 0
success_count: int = 0
error_count: int = 0
latency_sum_ms: float = 0.0
latency_min_ms: float = float("inf")
latency_max_ms: float = 0.0
bytes_in: int = 0
bytes_out: int = 0
latency_samples: List[float] = field(default_factory=list)
@staticmethod
def _compute_percentile(sorted_data: List[float], p: float) -> float:
if not sorted_data:
return 0.0
k = (len(sorted_data) - 1) * (p / 100.0)
f = int(k)
c = min(f + 1, len(sorted_data) - 1)
d = k - f
return sorted_data[f] + d * (sorted_data[c] - sorted_data[f])
def record(self, latency_ms: float, success: bool, bytes_in: int = 0, bytes_out: int = 0) -> None:
self.count += 1
if success:
self.success_count += 1
else:
self.error_count += 1
self.latency_sum_ms += latency_ms
if latency_ms < self.latency_min_ms:
self.latency_min_ms = latency_ms
if latency_ms > self.latency_max_ms:
self.latency_max_ms = latency_ms
self.bytes_in += bytes_in
self.bytes_out += bytes_out
if len(self.latency_samples) < MAX_LATENCY_SAMPLES:
self.latency_samples.append(latency_ms)
else:
j = random.randint(0, self.count - 1)
if j < MAX_LATENCY_SAMPLES:
self.latency_samples[j] = latency_ms
def to_dict(self) -> Dict[str, Any]:
avg_latency = self.latency_sum_ms / self.count if self.count > 0 else 0.0
min_latency = self.latency_min_ms if self.latency_min_ms != float("inf") else 0.0
sorted_latencies = sorted(self.latency_samples)
return {
"count": self.count,
"success_count": self.success_count,
"error_count": self.error_count,
"latency_avg_ms": round(avg_latency, 2),
"latency_min_ms": round(min_latency, 2),
"latency_max_ms": round(self.latency_max_ms, 2),
"latency_p50_ms": round(self._compute_percentile(sorted_latencies, 50), 2),
"latency_p95_ms": round(self._compute_percentile(sorted_latencies, 95), 2),
"latency_p99_ms": round(self._compute_percentile(sorted_latencies, 99), 2),
"bytes_in": self.bytes_in,
"bytes_out": self.bytes_out,
}
def merge(self, other: "OperationStats") -> None:
self.count += other.count
self.success_count += other.success_count
self.error_count += other.error_count
self.latency_sum_ms += other.latency_sum_ms
if other.latency_min_ms < self.latency_min_ms:
self.latency_min_ms = other.latency_min_ms
if other.latency_max_ms > self.latency_max_ms:
self.latency_max_ms = other.latency_max_ms
self.bytes_in += other.bytes_in
self.bytes_out += other.bytes_out
combined = self.latency_samples + other.latency_samples
if len(combined) > MAX_LATENCY_SAMPLES:
random.shuffle(combined)
combined = combined[:MAX_LATENCY_SAMPLES]
self.latency_samples = combined
@dataclass
class MetricsSnapshot:
timestamp: datetime
window_seconds: int
by_method: Dict[str, Dict[str, Any]]
by_endpoint: Dict[str, Dict[str, Any]]
by_status_class: Dict[str, int]
error_codes: Dict[str, int]
totals: Dict[str, Any]
def to_dict(self) -> Dict[str, Any]:
return {
"timestamp": self.timestamp.isoformat(),
"window_seconds": self.window_seconds,
"by_method": self.by_method,
"by_endpoint": self.by_endpoint,
"by_status_class": self.by_status_class,
"error_codes": self.error_codes,
"totals": self.totals,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "MetricsSnapshot":
return cls(
timestamp=datetime.fromisoformat(data["timestamp"]),
window_seconds=data.get("window_seconds", 300),
by_method=data.get("by_method", {}),
by_endpoint=data.get("by_endpoint", {}),
by_status_class=data.get("by_status_class", {}),
error_codes=data.get("error_codes", {}),
totals=data.get("totals", {}),
)
class OperationMetricsCollector:
def __init__(
self,
storage_root: Path,
interval_minutes: int = 5,
retention_hours: int = 24,
):
self.storage_root = storage_root
self.interval_seconds = interval_minutes * 60
self.retention_hours = retention_hours
self._lock = threading.Lock()
self._by_method: Dict[str, OperationStats] = {}
self._by_endpoint: Dict[str, OperationStats] = {}
self._by_status_class: Dict[str, int] = {}
self._error_codes: Dict[str, int] = {}
self._totals = OperationStats()
self._window_start = time.time()
self._shutdown = threading.Event()
self._snapshots: List[MetricsSnapshot] = []
self._load_history()
self._snapshot_thread = threading.Thread(
target=self._snapshot_loop, name="operation-metrics-snapshot", daemon=True
)
self._snapshot_thread.start()
def _config_path(self) -> Path:
return self.storage_root / ".myfsio.sys" / "config" / "operation_metrics.json"
def _load_history(self) -> None:
config_path = self._config_path()
if not config_path.exists():
return
try:
data = json.loads(config_path.read_text(encoding="utf-8"))
snapshots_data = data.get("snapshots", [])
self._snapshots = [MetricsSnapshot.from_dict(s) for s in snapshots_data]
self._prune_old_snapshots()
except (json.JSONDecodeError, OSError, KeyError) as e:
logger.warning(f"Failed to load operation metrics history: {e}")
def _save_history(self) -> None:
config_path = self._config_path()
config_path.parent.mkdir(parents=True, exist_ok=True)
try:
data = {"snapshots": [s.to_dict() for s in self._snapshots]}
config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
except OSError as e:
logger.warning(f"Failed to save operation metrics history: {e}")
def _prune_old_snapshots(self) -> None:
if not self._snapshots:
return
cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600)
self._snapshots = [
s for s in self._snapshots if s.timestamp.timestamp() > cutoff
]
def _snapshot_loop(self) -> None:
while not self._shutdown.is_set():
self._shutdown.wait(timeout=self.interval_seconds)
if not self._shutdown.is_set():
self._take_snapshot()
def _take_snapshot(self) -> None:
with self._lock:
now = datetime.now(timezone.utc)
window_seconds = int(time.time() - self._window_start)
snapshot = MetricsSnapshot(
timestamp=now,
window_seconds=window_seconds,
by_method={k: v.to_dict() for k, v in self._by_method.items()},
by_endpoint={k: v.to_dict() for k, v in self._by_endpoint.items()},
by_status_class=dict(self._by_status_class),
error_codes=dict(self._error_codes),
totals=self._totals.to_dict(),
)
self._snapshots.append(snapshot)
self._prune_old_snapshots()
self._save_history()
self._by_method.clear()
self._by_endpoint.clear()
self._by_status_class.clear()
self._error_codes.clear()
self._totals = OperationStats()
self._window_start = time.time()
def record_request(
self,
method: str,
endpoint_type: str,
status_code: int,
latency_ms: float,
bytes_in: int = 0,
bytes_out: int = 0,
error_code: Optional[str] = None,
) -> None:
success = 200 <= status_code < 400
status_class = f"{status_code // 100}xx"
with self._lock:
if method not in self._by_method:
self._by_method[method] = OperationStats()
self._by_method[method].record(latency_ms, success, bytes_in, bytes_out)
if endpoint_type not in self._by_endpoint:
self._by_endpoint[endpoint_type] = OperationStats()
self._by_endpoint[endpoint_type].record(latency_ms, success, bytes_in, bytes_out)
self._by_status_class[status_class] = self._by_status_class.get(status_class, 0) + 1
if error_code:
self._error_codes[error_code] = self._error_codes.get(error_code, 0) + 1
self._totals.record(latency_ms, success, bytes_in, bytes_out)
def get_current_stats(self) -> Dict[str, Any]:
with self._lock:
window_seconds = int(time.time() - self._window_start)
return {
"timestamp": datetime.now(timezone.utc).isoformat(),
"window_seconds": window_seconds,
"by_method": {k: v.to_dict() for k, v in self._by_method.items()},
"by_endpoint": {k: v.to_dict() for k, v in self._by_endpoint.items()},
"by_status_class": dict(self._by_status_class),
"error_codes": dict(self._error_codes),
"totals": self._totals.to_dict(),
}
def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]:
with self._lock:
snapshots = list(self._snapshots)
if hours:
cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff]
return [s.to_dict() for s in snapshots]
def shutdown(self) -> None:
self._shutdown.set()
self._take_snapshot()
self._snapshot_thread.join(timeout=5.0)
def classify_endpoint(path: str) -> str:
if not path or path == "/":
return "service"
path = path.rstrip("/")
if path.startswith("/ui"):
return "ui"
if path.startswith("/kms"):
return "kms"
if path.startswith("/myfsio"):
return "service"
parts = path.lstrip("/").split("/")
if len(parts) == 0:
return "service"
elif len(parts) == 1:
return "bucket"
else:
return "object"

View File

@@ -1,3 +1,4 @@
"""Background replication worker."""
from __future__ import annotations from __future__ import annotations
import json import json
@@ -8,7 +9,7 @@ import time
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional from typing import Dict, Optional
import boto3 import boto3
from botocore.config import Config from botocore.config import Config
@@ -24,49 +25,15 @@ REPLICATION_USER_AGENT = "S3ReplicationAgent/1.0"
REPLICATION_MODE_NEW_ONLY = "new_only" REPLICATION_MODE_NEW_ONLY = "new_only"
REPLICATION_MODE_ALL = "all" REPLICATION_MODE_ALL = "all"
REPLICATION_MODE_BIDIRECTIONAL = "bidirectional"
def _create_s3_client(
connection: RemoteConnection,
*,
health_check: bool = False,
connect_timeout: int = 5,
read_timeout: int = 30,
max_retries: int = 2,
) -> Any:
"""Create a boto3 S3 client for the given connection.
Args:
connection: Remote S3 connection configuration
health_check: If True, use minimal retries for quick health checks
"""
config = Config(
user_agent_extra=REPLICATION_USER_AGENT,
connect_timeout=connect_timeout,
read_timeout=read_timeout,
retries={'max_attempts': 1 if health_check else max_retries},
signature_version='s3v4',
s3={'addressing_style': 'path'},
request_checksum_calculation='when_required',
response_checksum_validation='when_required',
)
return boto3.client(
"s3",
endpoint_url=connection.endpoint_url,
aws_access_key_id=connection.access_key,
aws_secret_access_key=connection.secret_key,
region_name=connection.region or 'us-east-1',
config=config,
)
@dataclass @dataclass
class ReplicationStats: class ReplicationStats:
"""Statistics for replication operations - computed dynamically.""" """Statistics for replication operations - computed dynamically."""
objects_synced: int = 0 objects_synced: int = 0 # Objects that exist in both source and destination
objects_pending: int = 0 objects_pending: int = 0 # Objects in source but not in destination
objects_orphaned: int = 0 objects_orphaned: int = 0 # Objects in destination but not in source (will be deleted)
bytes_synced: int = 0 bytes_synced: int = 0 # Total bytes synced to destination
last_sync_at: Optional[float] = None last_sync_at: Optional[float] = None
last_sync_key: Optional[str] = None last_sync_key: Optional[str] = None
@@ -92,53 +59,16 @@ class ReplicationStats:
) )
@dataclass
class ReplicationFailure:
object_key: str
error_message: str
timestamp: float
failure_count: int
bucket_name: str
action: str
last_error_code: Optional[str] = None
def to_dict(self) -> dict:
return {
"object_key": self.object_key,
"error_message": self.error_message,
"timestamp": self.timestamp,
"failure_count": self.failure_count,
"bucket_name": self.bucket_name,
"action": self.action,
"last_error_code": self.last_error_code,
}
@classmethod
def from_dict(cls, data: dict) -> "ReplicationFailure":
return cls(
object_key=data["object_key"],
error_message=data["error_message"],
timestamp=data["timestamp"],
failure_count=data["failure_count"],
bucket_name=data["bucket_name"],
action=data["action"],
last_error_code=data.get("last_error_code"),
)
@dataclass @dataclass
class ReplicationRule: class ReplicationRule:
bucket_name: str bucket_name: str
target_connection_id: str target_connection_id: str
target_bucket: str target_bucket: str
enabled: bool = True enabled: bool = True
mode: str = REPLICATION_MODE_NEW_ONLY mode: str = REPLICATION_MODE_NEW_ONLY
created_at: Optional[float] = None created_at: Optional[float] = None
stats: ReplicationStats = field(default_factory=ReplicationStats) stats: ReplicationStats = field(default_factory=ReplicationStats)
sync_deletions: bool = True
last_pull_at: Optional[float] = None
filter_prefix: Optional[str] = None
def to_dict(self) -> dict: def to_dict(self) -> dict:
return { return {
"bucket_name": self.bucket_name, "bucket_name": self.bucket_name,
@@ -148,159 +78,31 @@ class ReplicationRule:
"mode": self.mode, "mode": self.mode,
"created_at": self.created_at, "created_at": self.created_at,
"stats": self.stats.to_dict(), "stats": self.stats.to_dict(),
"sync_deletions": self.sync_deletions,
"last_pull_at": self.last_pull_at,
"filter_prefix": self.filter_prefix,
} }
@classmethod @classmethod
def from_dict(cls, data: dict) -> "ReplicationRule": def from_dict(cls, data: dict) -> "ReplicationRule":
stats_data = data.pop("stats", {}) stats_data = data.pop("stats", {})
# Handle old rules without mode/created_at
if "mode" not in data: if "mode" not in data:
data["mode"] = REPLICATION_MODE_NEW_ONLY data["mode"] = REPLICATION_MODE_NEW_ONLY
if "created_at" not in data: if "created_at" not in data:
data["created_at"] = None data["created_at"] = None
if "sync_deletions" not in data:
data["sync_deletions"] = True
if "last_pull_at" not in data:
data["last_pull_at"] = None
if "filter_prefix" not in data:
data["filter_prefix"] = None
rule = cls(**data) rule = cls(**data)
rule.stats = ReplicationStats.from_dict(stats_data) if stats_data else ReplicationStats() rule.stats = ReplicationStats.from_dict(stats_data) if stats_data else ReplicationStats()
return rule return rule
class ReplicationFailureStore:
def __init__(self, storage_root: Path, max_failures_per_bucket: int = 50) -> None:
self.storage_root = storage_root
self.max_failures_per_bucket = max_failures_per_bucket
self._lock = threading.Lock()
self._cache: Dict[str, List[ReplicationFailure]] = {}
def _get_failures_path(self, bucket_name: str) -> Path:
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "replication_failures.json"
def _load_from_disk(self, bucket_name: str) -> List[ReplicationFailure]:
path = self._get_failures_path(bucket_name)
if not path.exists():
return []
try:
with open(path, "r") as f:
data = json.load(f)
return [ReplicationFailure.from_dict(d) for d in data.get("failures", [])]
except (OSError, ValueError, KeyError) as e:
logger.error(f"Failed to load replication failures for {bucket_name}: {e}")
return []
def _save_to_disk(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
path = self._get_failures_path(bucket_name)
path.parent.mkdir(parents=True, exist_ok=True)
data = {"failures": [f.to_dict() for f in failures[:self.max_failures_per_bucket]]}
try:
with open(path, "w") as f:
json.dump(data, f, indent=2)
except OSError as e:
logger.error(f"Failed to save replication failures for {bucket_name}: {e}")
def load_failures(self, bucket_name: str) -> List[ReplicationFailure]:
if bucket_name in self._cache:
return list(self._cache[bucket_name])
failures = self._load_from_disk(bucket_name)
self._cache[bucket_name] = failures
return list(failures)
def save_failures(self, bucket_name: str, failures: List[ReplicationFailure]) -> None:
trimmed = failures[:self.max_failures_per_bucket]
self._cache[bucket_name] = trimmed
self._save_to_disk(bucket_name, trimmed)
def add_failure(self, bucket_name: str, failure: ReplicationFailure) -> None:
with self._lock:
failures = self.load_failures(bucket_name)
existing = next((f for f in failures if f.object_key == failure.object_key), None)
if existing:
existing.failure_count += 1
existing.timestamp = failure.timestamp
existing.error_message = failure.error_message
existing.last_error_code = failure.last_error_code
else:
failures.insert(0, failure)
self.save_failures(bucket_name, failures)
def remove_failure(self, bucket_name: str, object_key: str) -> bool:
with self._lock:
failures = self.load_failures(bucket_name)
original_len = len(failures)
failures = [f for f in failures if f.object_key != object_key]
if len(failures) < original_len:
self.save_failures(bucket_name, failures)
return True
return False
def clear_failures(self, bucket_name: str) -> None:
with self._lock:
self._cache.pop(bucket_name, None)
path = self._get_failures_path(bucket_name)
if path.exists():
path.unlink()
def get_failure(self, bucket_name: str, object_key: str) -> Optional[ReplicationFailure]:
failures = self.load_failures(bucket_name)
return next((f for f in failures if f.object_key == object_key), None)
def get_failure_count(self, bucket_name: str) -> int:
return len(self.load_failures(bucket_name))
class ReplicationManager: class ReplicationManager:
def __init__( def __init__(self, storage: ObjectStorage, connections: ConnectionStore, rules_path: Path) -> None:
self,
storage: ObjectStorage,
connections: ConnectionStore,
rules_path: Path,
storage_root: Path,
connect_timeout: int = 5,
read_timeout: int = 30,
max_retries: int = 2,
streaming_threshold_bytes: int = 10 * 1024 * 1024,
max_failures_per_bucket: int = 50,
) -> None:
self.storage = storage self.storage = storage
self.connections = connections self.connections = connections
self.rules_path = rules_path self.rules_path = rules_path
self.storage_root = storage_root
self.connect_timeout = connect_timeout
self.read_timeout = read_timeout
self.max_retries = max_retries
self.streaming_threshold_bytes = streaming_threshold_bytes
self._rules: Dict[str, ReplicationRule] = {} self._rules: Dict[str, ReplicationRule] = {}
self._stats_lock = threading.Lock() self._stats_lock = threading.Lock()
self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker") self._executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="ReplicationWorker")
self._shutdown = False
self.failure_store = ReplicationFailureStore(storage_root, max_failures_per_bucket)
self.reload_rules() self.reload_rules()
def _create_client(self, connection: RemoteConnection, *, health_check: bool = False) -> Any:
"""Create an S3 client with the manager's configured timeouts."""
return _create_s3_client(
connection,
health_check=health_check,
connect_timeout=self.connect_timeout,
read_timeout=self.read_timeout,
max_retries=self.max_retries,
)
def shutdown(self, wait: bool = True) -> None:
"""Shutdown the replication executor gracefully.
Args:
wait: If True, wait for pending tasks to complete
"""
self._shutdown = True
self._executor.shutdown(wait=wait)
logger.info("Replication manager shut down")
def reload_rules(self) -> None: def reload_rules(self) -> None:
if not self.rules_path.exists(): if not self.rules_path.exists():
self._rules = {} self._rules = {}
@@ -319,36 +121,13 @@ class ReplicationManager:
with open(self.rules_path, "w") as f: with open(self.rules_path, "w") as f:
json.dump(data, f, indent=2) json.dump(data, f, indent=2)
def check_endpoint_health(self, connection: RemoteConnection) -> bool:
"""Check if a remote endpoint is reachable and responsive.
Returns True if endpoint is healthy, False otherwise.
Uses short timeouts to prevent blocking.
"""
try:
s3 = self._create_client(connection, health_check=True)
s3.list_buckets()
return True
except Exception as e:
logger.warning(f"Endpoint health check failed for {connection.name} ({connection.endpoint_url}): {e}")
return False
def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]: def get_rule(self, bucket_name: str) -> Optional[ReplicationRule]:
return self._rules.get(bucket_name) return self._rules.get(bucket_name)
def list_rules(self) -> List[ReplicationRule]:
return list(self._rules.values())
def set_rule(self, rule: ReplicationRule) -> None: def set_rule(self, rule: ReplicationRule) -> None:
old_rule = self._rules.get(rule.bucket_name)
was_all_mode = old_rule and old_rule.mode == REPLICATION_MODE_ALL if old_rule else False
self._rules[rule.bucket_name] = rule self._rules[rule.bucket_name] = rule
self.save_rules() self.save_rules()
if rule.mode == REPLICATION_MODE_ALL and rule.enabled and not was_all_mode:
logger.info(f"Replication mode ALL enabled for {rule.bucket_name}, triggering sync of existing objects")
self._executor.submit(self.replicate_existing_objects, rule.bucket_name)
def delete_rule(self, bucket_name: str) -> None: def delete_rule(self, bucket_name: str) -> None:
if bucket_name in self._rules: if bucket_name in self._rules:
del self._rules[bucket_name] del self._rules[bucket_name]
@@ -372,14 +151,22 @@ class ReplicationManager:
connection = self.connections.get(rule.target_connection_id) connection = self.connections.get(rule.target_connection_id)
if not connection: if not connection:
return rule.stats return rule.stats # Return cached stats if connection unavailable
try: try:
# Get source objects
source_objects = self.storage.list_objects_all(bucket_name) source_objects = self.storage.list_objects_all(bucket_name)
source_keys = {obj.key: obj.size for obj in source_objects} source_keys = {obj.key: obj.size for obj in source_objects}
s3 = self._create_client(connection) # Get destination objects
s3 = boto3.client(
"s3",
endpoint_url=connection.endpoint_url,
aws_access_key_id=connection.access_key,
aws_secret_access_key=connection.secret_key,
region_name=connection.region,
)
dest_keys = set() dest_keys = set()
bytes_synced = 0 bytes_synced = 0
paginator = s3.get_paginator('list_objects_v2') paginator = s3.get_paginator('list_objects_v2')
@@ -391,18 +178,24 @@ class ReplicationManager:
bytes_synced += obj.get('Size', 0) bytes_synced += obj.get('Size', 0)
except ClientError as e: except ClientError as e:
if e.response['Error']['Code'] == 'NoSuchBucket': if e.response['Error']['Code'] == 'NoSuchBucket':
# Destination bucket doesn't exist yet
dest_keys = set() dest_keys = set()
else: else:
raise raise
synced = source_keys.keys() & dest_keys # Compute stats
orphaned = dest_keys - source_keys.keys() synced = source_keys.keys() & dest_keys # Objects in both
orphaned = dest_keys - source_keys.keys() # In dest but not source
# For "new_only" mode, we can't determine pending since we don't know
# which objects existed before replication was enabled. Only "all" mode
# should show pending (objects that should be replicated but aren't yet).
if rule.mode == REPLICATION_MODE_ALL: if rule.mode == REPLICATION_MODE_ALL:
pending = source_keys.keys() - dest_keys pending = source_keys.keys() - dest_keys # In source but not dest
else: else:
pending = set() pending = set() # New-only mode: don't show pre-existing as pending
# Update cached stats with computed values
rule.stats.objects_synced = len(synced) rule.stats.objects_synced = len(synced)
rule.stats.objects_pending = len(pending) rule.stats.objects_pending = len(pending)
rule.stats.objects_orphaned = len(orphaned) rule.stats.objects_orphaned = len(orphaned)
@@ -412,7 +205,7 @@ class ReplicationManager:
except (ClientError, StorageError) as e: except (ClientError, StorageError) as e:
logger.error(f"Failed to compute sync status for {bucket_name}: {e}") logger.error(f"Failed to compute sync status for {bucket_name}: {e}")
return rule.stats return rule.stats # Return cached stats on error
def replicate_existing_objects(self, bucket_name: str) -> None: def replicate_existing_objects(self, bucket_name: str) -> None:
"""Trigger replication for all existing objects in a bucket.""" """Trigger replication for all existing objects in a bucket."""
@@ -425,10 +218,6 @@ class ReplicationManager:
logger.warning(f"Cannot replicate existing objects: Connection {rule.target_connection_id} not found") logger.warning(f"Cannot replicate existing objects: Connection {rule.target_connection_id} not found")
return return
if not self.check_endpoint_health(connection):
logger.warning(f"Cannot replicate existing objects: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable")
return
try: try:
objects = self.storage.list_objects_all(bucket_name) objects = self.storage.list_objects_all(bucket_name)
logger.info(f"Starting replication of {len(objects)} existing objects from {bucket_name}") logger.info(f"Starting replication of {len(objects)} existing objects from {bucket_name}")
@@ -444,7 +233,13 @@ class ReplicationManager:
raise ValueError(f"Connection {connection_id} not found") raise ValueError(f"Connection {connection_id} not found")
try: try:
s3 = self._create_client(connection) s3 = boto3.client(
"s3",
endpoint_url=connection.endpoint_url,
aws_access_key_id=connection.access_key,
aws_secret_access_key=connection.secret_key,
region_name=connection.region,
)
s3.create_bucket(Bucket=bucket_name) s3.create_bucket(Bucket=bucket_name)
except ClientError as e: except ClientError as e:
logger.error(f"Failed to create remote bucket {bucket_name}: {e}") logger.error(f"Failed to create remote bucket {bucket_name}: {e}")
@@ -460,53 +255,39 @@ class ReplicationManager:
logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Connection {rule.target_connection_id} not found") logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Connection {rule.target_connection_id} not found")
return return
if not self.check_endpoint_health(connection):
logger.warning(f"Replication skipped for {bucket_name}/{object_key}: Endpoint {connection.name} ({connection.endpoint_url}) is not reachable")
return
self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, action) self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, action)
def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection, action: str) -> None: def _replicate_task(self, bucket_name: str, object_key: str, rule: ReplicationRule, conn: RemoteConnection, action: str) -> None:
if self._shutdown:
return
current_rule = self.get_rule(bucket_name)
if not current_rule or not current_rule.enabled:
logger.debug(f"Replication skipped for {bucket_name}/{object_key}: rule disabled or removed")
return
if ".." in object_key or object_key.startswith("/") or object_key.startswith("\\"): if ".." in object_key or object_key.startswith("/") or object_key.startswith("\\"):
logger.error(f"Invalid object key in replication (path traversal attempt): {object_key}") logger.error(f"Invalid object key in replication (path traversal attempt): {object_key}")
return return
try: try:
from .storage import ObjectStorage from .storage import ObjectStorage
ObjectStorage._sanitize_object_key(object_key) ObjectStorage._sanitize_object_key(object_key)
except StorageError as e: except StorageError as e:
logger.error(f"Object key validation failed in replication: {e}") logger.error(f"Object key validation failed in replication: {e}")
return return
file_size = 0
try: try:
s3 = self._create_client(conn) config = Config(user_agent_extra=REPLICATION_USER_AGENT)
s3 = boto3.client(
"s3",
endpoint_url=conn.endpoint_url,
aws_access_key_id=conn.access_key,
aws_secret_access_key=conn.secret_key,
region_name=conn.region,
config=config,
)
if action == "delete": if action == "delete":
try: try:
s3.delete_object(Bucket=rule.target_bucket, Key=object_key) s3.delete_object(Bucket=rule.target_bucket, Key=object_key)
logger.info(f"Replicated DELETE {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})") logger.info(f"Replicated DELETE {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
self._update_last_sync(bucket_name, object_key) self._update_last_sync(bucket_name, object_key)
self.failure_store.remove_failure(bucket_name, object_key)
except ClientError as e: except ClientError as e:
error_code = e.response.get('Error', {}).get('Code')
logger.error(f"Replication DELETE failed for {bucket_name}/{object_key}: {e}") logger.error(f"Replication DELETE failed for {bucket_name}/{object_key}: {e}")
self.failure_store.add_failure(bucket_name, ReplicationFailure(
object_key=object_key,
error_message=str(e),
timestamp=time.time(),
failure_count=1,
bucket_name=bucket_name,
action="delete",
last_error_code=error_code,
))
return return
try: try:
@@ -515,153 +296,61 @@ class ReplicationManager:
logger.error(f"Source object not found: {bucket_name}/{object_key}") logger.error(f"Source object not found: {bucket_name}/{object_key}")
return return
metadata = self.storage.get_object_metadata(bucket_name, object_key)
extra_args = {}
if metadata:
extra_args["Metadata"] = metadata
# Guess content type to prevent corruption/wrong handling
content_type, _ = mimetypes.guess_type(path) content_type, _ = mimetypes.guess_type(path)
file_size = path.stat().st_size file_size = path.stat().st_size
logger.info(f"Replicating {bucket_name}/{object_key}: Size={file_size}, ContentType={content_type}") logger.info(f"Replicating {bucket_name}/{object_key}: Size={file_size}, ContentType={content_type}")
def do_upload() -> None:
"""Upload object using appropriate method based on file size.
For small files (< 10 MiB): Read into memory for simpler handling
For large files: Use streaming upload to avoid memory issues
"""
extra_args = {}
if content_type:
extra_args["ContentType"] = content_type
if file_size >= self.streaming_threshold_bytes:
s3.upload_file(
str(path),
rule.target_bucket,
object_key,
ExtraArgs=extra_args if extra_args else None,
)
else:
file_content = path.read_bytes()
put_kwargs = {
"Bucket": rule.target_bucket,
"Key": object_key,
"Body": file_content,
**extra_args,
}
s3.put_object(**put_kwargs)
try: try:
do_upload() with path.open("rb") as f:
s3.put_object(
Bucket=rule.target_bucket,
Key=object_key,
Body=f,
ContentLength=file_size,
ContentType=content_type or "application/octet-stream",
Metadata=metadata or {}
)
except (ClientError, S3UploadFailedError) as e: except (ClientError, S3UploadFailedError) as e:
error_code = None is_no_bucket = False
if isinstance(e, ClientError): if isinstance(e, ClientError):
error_code = e.response['Error']['Code'] if e.response['Error']['Code'] == 'NoSuchBucket':
is_no_bucket = True
elif isinstance(e, S3UploadFailedError): elif isinstance(e, S3UploadFailedError):
if "NoSuchBucket" in str(e): if "NoSuchBucket" in str(e):
error_code = 'NoSuchBucket' is_no_bucket = True
if error_code == 'NoSuchBucket': if is_no_bucket:
logger.info(f"Target bucket {rule.target_bucket} not found. Attempting to create it.") logger.info(f"Target bucket {rule.target_bucket} not found. Attempting to create it.")
bucket_ready = False
try: try:
s3.create_bucket(Bucket=rule.target_bucket) s3.create_bucket(Bucket=rule.target_bucket)
bucket_ready = True # Retry upload
logger.info(f"Created target bucket {rule.target_bucket}") with path.open("rb") as f:
except ClientError as bucket_err: s3.put_object(
if bucket_err.response['Error']['Code'] in ('BucketAlreadyExists', 'BucketAlreadyOwnedByYou'): Bucket=rule.target_bucket,
logger.debug(f"Bucket {rule.target_bucket} already exists (created by another thread)") Key=object_key,
bucket_ready = True Body=f,
else: ContentLength=file_size,
logger.error(f"Failed to create target bucket {rule.target_bucket}: {bucket_err}") ContentType=content_type or "application/octet-stream",
raise e Metadata=metadata or {}
)
if bucket_ready: except Exception as create_err:
do_upload() logger.error(f"Failed to create target bucket {rule.target_bucket}: {create_err}")
raise e # Raise original error
else: else:
raise e raise e
logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})") logger.info(f"Replicated {bucket_name}/{object_key} to {conn.name} ({rule.target_bucket})")
self._update_last_sync(bucket_name, object_key) self._update_last_sync(bucket_name, object_key)
self.failure_store.remove_failure(bucket_name, object_key)
except (ClientError, OSError, ValueError) as e: except (ClientError, OSError, ValueError) as e:
error_code = None
if isinstance(e, ClientError):
error_code = e.response.get('Error', {}).get('Code')
logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}") logger.error(f"Replication failed for {bucket_name}/{object_key}: {e}")
self.failure_store.add_failure(bucket_name, ReplicationFailure( except Exception:
object_key=object_key,
error_message=str(e),
timestamp=time.time(),
failure_count=1,
bucket_name=bucket_name,
action=action,
last_error_code=error_code,
))
except Exception as e:
logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}") logger.exception(f"Unexpected error during replication for {bucket_name}/{object_key}")
self.failure_store.add_failure(bucket_name, ReplicationFailure(
object_key=object_key,
error_message=str(e),
timestamp=time.time(),
failure_count=1,
bucket_name=bucket_name,
action=action,
last_error_code=None,
))
def get_failed_items(self, bucket_name: str, limit: int = 50, offset: int = 0) -> List[ReplicationFailure]:
failures = self.failure_store.load_failures(bucket_name)
return failures[offset:offset + limit]
def get_failure_count(self, bucket_name: str) -> int:
return self.failure_store.get_failure_count(bucket_name)
def retry_failed_item(self, bucket_name: str, object_key: str) -> bool:
failure = self.failure_store.get_failure(bucket_name, object_key)
if not failure:
return False
rule = self.get_rule(bucket_name)
if not rule or not rule.enabled:
return False
connection = self.connections.get(rule.target_connection_id)
if not connection:
logger.warning(f"Cannot retry: Connection {rule.target_connection_id} not found")
return False
if not self.check_endpoint_health(connection):
logger.warning(f"Cannot retry: Endpoint {connection.name} is not reachable")
return False
self._executor.submit(self._replicate_task, bucket_name, object_key, rule, connection, failure.action)
return True
def retry_all_failed(self, bucket_name: str) -> Dict[str, int]:
failures = self.failure_store.load_failures(bucket_name)
if not failures:
return {"submitted": 0, "skipped": 0}
rule = self.get_rule(bucket_name)
if not rule or not rule.enabled:
return {"submitted": 0, "skipped": len(failures)}
connection = self.connections.get(rule.target_connection_id)
if not connection:
logger.warning(f"Cannot retry: Connection {rule.target_connection_id} not found")
return {"submitted": 0, "skipped": len(failures)}
if not self.check_endpoint_health(connection):
logger.warning(f"Cannot retry: Endpoint {connection.name} is not reachable")
return {"submitted": 0, "skipped": len(failures)}
submitted = 0
for failure in failures:
self._executor.submit(self._replicate_task, bucket_name, failure.object_key, rule, connection, failure.action)
submitted += 1
return {"submitted": submitted, "skipped": 0}
def dismiss_failure(self, bucket_name: str, object_key: str) -> bool:
return self.failure_store.remove_failure(bucket_name, object_key)
def clear_failures(self, bucket_name: str) -> None:
self.failure_store.clear_failures(bucket_name)

File diff suppressed because it is too large Load Diff

View File

@@ -1,284 +0,0 @@
from __future__ import annotations
import json
import logging
import threading
import time
from typing import Any, Generator, Optional
import boto3
from botocore.config import Config
from botocore.exceptions import ClientError, EndpointConnectionError, ConnectionClosedError
from flask import current_app, session
logger = logging.getLogger(__name__)
UI_PROXY_USER_AGENT = "MyFSIO-UIProxy/1.0"
_BOTO_ERROR_MAP = {
"NoSuchBucket": 404,
"NoSuchKey": 404,
"NoSuchUpload": 404,
"BucketAlreadyExists": 409,
"BucketAlreadyOwnedByYou": 409,
"BucketNotEmpty": 409,
"AccessDenied": 403,
"InvalidAccessKeyId": 403,
"SignatureDoesNotMatch": 403,
"InvalidBucketName": 400,
"InvalidArgument": 400,
"MalformedXML": 400,
"EntityTooLarge": 400,
"QuotaExceeded": 403,
}
_UPLOAD_REGISTRY_MAX_AGE = 86400
_UPLOAD_REGISTRY_CLEANUP_INTERVAL = 3600
class UploadRegistry:
def __init__(self) -> None:
self._entries: dict[str, tuple[str, str, float]] = {}
self._lock = threading.Lock()
self._last_cleanup = time.monotonic()
def register(self, upload_id: str, bucket_name: str, object_key: str) -> None:
with self._lock:
self._entries[upload_id] = (bucket_name, object_key, time.monotonic())
self._maybe_cleanup()
def get_key(self, upload_id: str, bucket_name: str) -> Optional[str]:
with self._lock:
entry = self._entries.get(upload_id)
if entry is None:
return None
stored_bucket, key, created_at = entry
if stored_bucket != bucket_name:
return None
if time.monotonic() - created_at > _UPLOAD_REGISTRY_MAX_AGE:
del self._entries[upload_id]
return None
return key
def remove(self, upload_id: str) -> None:
with self._lock:
self._entries.pop(upload_id, None)
def _maybe_cleanup(self) -> None:
now = time.monotonic()
if now - self._last_cleanup < _UPLOAD_REGISTRY_CLEANUP_INTERVAL:
return
self._last_cleanup = now
cutoff = now - _UPLOAD_REGISTRY_MAX_AGE
stale = [uid for uid, (_, _, ts) in self._entries.items() if ts < cutoff]
for uid in stale:
del self._entries[uid]
class S3ProxyClient:
def __init__(self, api_base_url: str, region: str = "us-east-1") -> None:
if not api_base_url:
raise ValueError("api_base_url is required for S3ProxyClient")
self._api_base_url = api_base_url.rstrip("/")
self._region = region
self.upload_registry = UploadRegistry()
@property
def api_base_url(self) -> str:
return self._api_base_url
def get_client(self, access_key: str, secret_key: str) -> Any:
if not access_key or not secret_key:
raise ValueError("Both access_key and secret_key are required")
config = Config(
user_agent_extra=UI_PROXY_USER_AGENT,
connect_timeout=5,
read_timeout=30,
retries={"max_attempts": 0},
signature_version="s3v4",
s3={"addressing_style": "path"},
request_checksum_calculation="when_required",
response_checksum_validation="when_required",
)
return boto3.client(
"s3",
endpoint_url=self._api_base_url,
aws_access_key_id=access_key,
aws_secret_access_key=secret_key,
region_name=self._region,
config=config,
)
def _get_proxy() -> S3ProxyClient:
proxy = current_app.extensions.get("s3_proxy")
if proxy is None:
raise RuntimeError(
"S3 proxy not configured. Set API_BASE_URL or run both API and UI servers."
)
return proxy
def _get_session_creds() -> tuple[str, str]:
secret_store = current_app.extensions["secret_store"]
secret_store.purge_expired()
token = session.get("cred_token")
if not token:
raise PermissionError("Not authenticated")
creds = secret_store.peek(token)
if not creds:
raise PermissionError("Session expired")
access_key = creds.get("access_key", "")
secret_key = creds.get("secret_key", "")
if not access_key or not secret_key:
raise PermissionError("Invalid session credentials")
return access_key, secret_key
def get_session_s3_client() -> Any:
proxy = _get_proxy()
access_key, secret_key = _get_session_creds()
return proxy.get_client(access_key, secret_key)
def get_upload_registry() -> UploadRegistry:
return _get_proxy().upload_registry
def handle_client_error(exc: ClientError) -> tuple[dict[str, str], int]:
error_info = exc.response.get("Error", {})
code = error_info.get("Code", "InternalError")
message = error_info.get("Message") or "S3 operation failed"
http_status = _BOTO_ERROR_MAP.get(code)
if http_status is None:
http_status = exc.response.get("ResponseMetadata", {}).get("HTTPStatusCode", 500)
return {"error": message}, http_status
def handle_connection_error(exc: Exception) -> tuple[dict[str, str], int]:
logger.error("S3 API connection failed: %s", exc)
return {"error": "S3 API server is unreachable. Ensure the API server is running."}, 502
def format_datetime_display(dt: Any, display_tz: str = "UTC") -> str:
from .ui import _format_datetime_display
return _format_datetime_display(dt, display_tz)
def format_datetime_iso(dt: Any, display_tz: str = "UTC") -> str:
from .ui import _format_datetime_iso
return _format_datetime_iso(dt, display_tz)
def build_url_templates(bucket_name: str) -> dict[str, str]:
from flask import url_for
preview_t = url_for("ui.object_preview", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
delete_t = url_for("ui.delete_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
presign_t = url_for("ui.object_presign", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
versions_t = url_for("ui.object_versions", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
restore_t = url_for(
"ui.restore_object_version",
bucket_name=bucket_name,
object_key="KEY_PLACEHOLDER",
version_id="VERSION_ID_PLACEHOLDER",
)
tags_t = url_for("ui.object_tags", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
copy_t = url_for("ui.copy_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
move_t = url_for("ui.move_object", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
metadata_t = url_for("ui.object_metadata", bucket_name=bucket_name, object_key="KEY_PLACEHOLDER")
return {
"preview": preview_t,
"download": preview_t + "?download=1",
"presign": presign_t,
"delete": delete_t,
"versions": versions_t,
"restore": restore_t,
"tags": tags_t,
"copy": copy_t,
"move": move_t,
"metadata": metadata_t,
}
def translate_list_objects(
boto3_response: dict[str, Any],
url_templates: dict[str, str],
display_tz: str = "UTC",
versioning_enabled: bool = False,
) -> dict[str, Any]:
objects_data = []
for obj in boto3_response.get("Contents", []):
last_mod = obj["LastModified"]
objects_data.append({
"key": obj["Key"],
"size": obj["Size"],
"last_modified": last_mod.isoformat(),
"last_modified_display": format_datetime_display(last_mod, display_tz),
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
"etag": obj.get("ETag", "").strip('"'),
})
return {
"objects": objects_data,
"is_truncated": boto3_response.get("IsTruncated", False),
"next_continuation_token": boto3_response.get("NextContinuationToken"),
"total_count": boto3_response.get("KeyCount", len(objects_data)),
"versioning_enabled": versioning_enabled,
"url_templates": url_templates,
}
def get_versioning_via_s3(client: Any, bucket_name: str) -> bool:
try:
resp = client.get_bucket_versioning(Bucket=bucket_name)
return resp.get("Status") == "Enabled"
except ClientError as exc:
code = exc.response.get("Error", {}).get("Code", "")
if code != "NoSuchBucket":
logger.warning("Failed to check versioning for %s: %s", bucket_name, code)
return False
def stream_objects_ndjson(
client: Any,
bucket_name: str,
prefix: Optional[str],
url_templates: dict[str, str],
display_tz: str = "UTC",
versioning_enabled: bool = False,
) -> Generator[str, None, None]:
meta_line = json.dumps({
"type": "meta",
"versioning_enabled": versioning_enabled,
"url_templates": url_templates,
}) + "\n"
yield meta_line
yield json.dumps({"type": "count", "total_count": 0}) + "\n"
kwargs: dict[str, Any] = {"Bucket": bucket_name, "MaxKeys": 1000}
if prefix:
kwargs["Prefix"] = prefix
try:
paginator = client.get_paginator("list_objects_v2")
for page in paginator.paginate(**kwargs):
for obj in page.get("Contents", []):
last_mod = obj["LastModified"]
yield json.dumps({
"type": "object",
"key": obj["Key"],
"size": obj["Size"],
"last_modified": last_mod.isoformat(),
"last_modified_display": format_datetime_display(last_mod, display_tz),
"last_modified_iso": format_datetime_iso(last_mod, display_tz),
"etag": obj.get("ETag", "").strip('"'),
}) + "\n"
except ClientError as exc:
error_msg = exc.response.get("Error", {}).get("Message", "S3 operation failed")
yield json.dumps({"type": "error", "error": error_msg}) + "\n"
return
except (EndpointConnectionError, ConnectionClosedError):
yield json.dumps({"type": "error", "error": "S3 API server is unreachable"}) + "\n"
return
yield json.dumps({"type": "done"}) + "\n"

View File

@@ -1,3 +1,4 @@
"""Ephemeral store for one-time secrets communicated to the UI."""
from __future__ import annotations from __future__ import annotations
import secrets import secrets
@@ -18,18 +19,6 @@ class EphemeralSecretStore:
self._store[token] = (payload, expires_at) self._store[token] = (payload, expires_at)
return token return token
def peek(self, token: str | None) -> Any | None:
if not token:
return None
entry = self._store.get(token)
if not entry:
return None
payload, expires_at = entry
if expires_at < time.time():
self._store.pop(token, None)
return None
return payload
def pop(self, token: str | None) -> Any | None: def pop(self, token: str | None) -> Any | None:
if not token: if not token:
return None return None

View File

@@ -1,171 +0,0 @@
"""S3 SelectObjectContent SQL query execution using DuckDB."""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any, Dict, Generator, Optional
try:
import duckdb
DUCKDB_AVAILABLE = True
except ImportError:
DUCKDB_AVAILABLE = False
class SelectError(Exception):
"""Error during SELECT query execution."""
pass
def execute_select_query(
file_path: Path,
expression: str,
input_format: str,
input_config: Dict[str, Any],
output_format: str,
output_config: Dict[str, Any],
chunk_size: int = 65536,
) -> Generator[bytes, None, None]:
"""Execute SQL query on object content."""
if not DUCKDB_AVAILABLE:
raise SelectError("DuckDB is not installed. Install with: pip install duckdb")
conn = duckdb.connect(":memory:")
try:
if input_format == "CSV":
_load_csv(conn, file_path, input_config)
elif input_format == "JSON":
_load_json(conn, file_path, input_config)
elif input_format == "Parquet":
_load_parquet(conn, file_path)
else:
raise SelectError(f"Unsupported input format: {input_format}")
normalized_expression = expression.replace("s3object", "data").replace("S3Object", "data")
try:
result = conn.execute(normalized_expression)
except duckdb.Error as exc:
raise SelectError(f"SQL execution error: {exc}")
if output_format == "CSV":
yield from _output_csv(result, output_config, chunk_size)
elif output_format == "JSON":
yield from _output_json(result, output_config, chunk_size)
else:
raise SelectError(f"Unsupported output format: {output_format}")
finally:
conn.close()
def _load_csv(conn, file_path: Path, config: Dict[str, Any]) -> None:
"""Load CSV file into DuckDB."""
file_header_info = config.get("file_header_info", "NONE")
delimiter = config.get("field_delimiter", ",")
quote = config.get("quote_character", '"')
header = file_header_info in ("USE", "IGNORE")
path_str = str(file_path).replace("\\", "/")
conn.execute(f"""
CREATE TABLE data AS
SELECT * FROM read_csv('{path_str}',
header={header},
delim='{delimiter}',
quote='{quote}'
)
""")
def _load_json(conn, file_path: Path, config: Dict[str, Any]) -> None:
"""Load JSON file into DuckDB."""
json_type = config.get("type", "DOCUMENT")
path_str = str(file_path).replace("\\", "/")
if json_type == "LINES":
conn.execute(f"""
CREATE TABLE data AS
SELECT * FROM read_json_auto('{path_str}', format='newline_delimited')
""")
else:
conn.execute(f"""
CREATE TABLE data AS
SELECT * FROM read_json_auto('{path_str}', format='array')
""")
def _load_parquet(conn, file_path: Path) -> None:
"""Load Parquet file into DuckDB."""
path_str = str(file_path).replace("\\", "/")
conn.execute(f"CREATE TABLE data AS SELECT * FROM read_parquet('{path_str}')")
def _output_csv(
result,
config: Dict[str, Any],
chunk_size: int,
) -> Generator[bytes, None, None]:
"""Output query results as CSV."""
delimiter = config.get("field_delimiter", ",")
record_delimiter = config.get("record_delimiter", "\n")
quote = config.get("quote_character", '"')
buffer = ""
while True:
rows = result.fetchmany(1000)
if not rows:
break
for row in rows:
fields = []
for value in row:
if value is None:
fields.append("")
elif isinstance(value, str):
if delimiter in value or quote in value or record_delimiter in value:
escaped = value.replace(quote, quote + quote)
fields.append(f'{quote}{escaped}{quote}')
else:
fields.append(value)
else:
fields.append(str(value))
buffer += delimiter.join(fields) + record_delimiter
while len(buffer) >= chunk_size:
yield buffer[:chunk_size].encode("utf-8")
buffer = buffer[chunk_size:]
if buffer:
yield buffer.encode("utf-8")
def _output_json(
result,
config: Dict[str, Any],
chunk_size: int,
) -> Generator[bytes, None, None]:
"""Output query results as JSON Lines."""
record_delimiter = config.get("record_delimiter", "\n")
columns = [desc[0] for desc in result.description]
buffer = ""
while True:
rows = result.fetchmany(1000)
if not rows:
break
for row in rows:
record = dict(zip(columns, row))
buffer += json.dumps(record, default=str) + record_delimiter
while len(buffer) >= chunk_size:
yield buffer[:chunk_size].encode("utf-8")
buffer = buffer[chunk_size:]
if buffer:
yield buffer.encode("utf-8")

View File

@@ -1,177 +0,0 @@
from __future__ import annotations
import json
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional
@dataclass
class SiteInfo:
site_id: str
endpoint: str
region: str = "us-east-1"
priority: int = 100
display_name: str = ""
created_at: Optional[float] = None
updated_at: Optional[float] = None
def __post_init__(self) -> None:
if not self.display_name:
self.display_name = self.site_id
if self.created_at is None:
self.created_at = time.time()
def to_dict(self) -> Dict[str, Any]:
return {
"site_id": self.site_id,
"endpoint": self.endpoint,
"region": self.region,
"priority": self.priority,
"display_name": self.display_name,
"created_at": self.created_at,
"updated_at": self.updated_at,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> SiteInfo:
return cls(
site_id=data["site_id"],
endpoint=data.get("endpoint", ""),
region=data.get("region", "us-east-1"),
priority=data.get("priority", 100),
display_name=data.get("display_name", ""),
created_at=data.get("created_at"),
updated_at=data.get("updated_at"),
)
@dataclass
class PeerSite:
site_id: str
endpoint: str
region: str = "us-east-1"
priority: int = 100
display_name: str = ""
created_at: Optional[float] = None
updated_at: Optional[float] = None
connection_id: Optional[str] = None
is_healthy: Optional[bool] = None
last_health_check: Optional[float] = None
def __post_init__(self) -> None:
if not self.display_name:
self.display_name = self.site_id
if self.created_at is None:
self.created_at = time.time()
def to_dict(self) -> Dict[str, Any]:
return {
"site_id": self.site_id,
"endpoint": self.endpoint,
"region": self.region,
"priority": self.priority,
"display_name": self.display_name,
"created_at": self.created_at,
"updated_at": self.updated_at,
"connection_id": self.connection_id,
"is_healthy": self.is_healthy,
"last_health_check": self.last_health_check,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> PeerSite:
return cls(
site_id=data["site_id"],
endpoint=data.get("endpoint", ""),
region=data.get("region", "us-east-1"),
priority=data.get("priority", 100),
display_name=data.get("display_name", ""),
created_at=data.get("created_at"),
updated_at=data.get("updated_at"),
connection_id=data.get("connection_id"),
is_healthy=data.get("is_healthy"),
last_health_check=data.get("last_health_check"),
)
class SiteRegistry:
def __init__(self, config_path: Path) -> None:
self.config_path = config_path
self._local_site: Optional[SiteInfo] = None
self._peers: Dict[str, PeerSite] = {}
self.reload()
def reload(self) -> None:
if not self.config_path.exists():
self._local_site = None
self._peers = {}
return
try:
with open(self.config_path, "r", encoding="utf-8") as f:
data = json.load(f)
if data.get("local"):
self._local_site = SiteInfo.from_dict(data["local"])
else:
self._local_site = None
self._peers = {}
for peer_data in data.get("peers", []):
peer = PeerSite.from_dict(peer_data)
self._peers[peer.site_id] = peer
except (OSError, json.JSONDecodeError, KeyError):
self._local_site = None
self._peers = {}
def save(self) -> None:
self.config_path.parent.mkdir(parents=True, exist_ok=True)
data = {
"local": self._local_site.to_dict() if self._local_site else None,
"peers": [peer.to_dict() for peer in self._peers.values()],
}
with open(self.config_path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
def get_local_site(self) -> Optional[SiteInfo]:
return self._local_site
def set_local_site(self, site: SiteInfo) -> None:
site.updated_at = time.time()
self._local_site = site
self.save()
def list_peers(self) -> List[PeerSite]:
return list(self._peers.values())
def get_peer(self, site_id: str) -> Optional[PeerSite]:
return self._peers.get(site_id)
def add_peer(self, peer: PeerSite) -> None:
peer.created_at = peer.created_at or time.time()
self._peers[peer.site_id] = peer
self.save()
def update_peer(self, peer: PeerSite) -> None:
if peer.site_id not in self._peers:
raise ValueError(f"Peer {peer.site_id} not found")
peer.updated_at = time.time()
self._peers[peer.site_id] = peer
self.save()
def delete_peer(self, site_id: str) -> bool:
if site_id in self._peers:
del self._peers[site_id]
self.save()
return True
return False
def update_health(self, site_id: str, is_healthy: bool) -> None:
peer = self._peers.get(site_id)
if peer:
peer.is_healthy = is_healthy
peer.last_health_check = time.time()
self.save()

View File

@@ -1,416 +0,0 @@
from __future__ import annotations
import json
import logging
import tempfile
import threading
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, TYPE_CHECKING
import boto3
from botocore.config import Config
from botocore.exceptions import ClientError
if TYPE_CHECKING:
from .connections import ConnectionStore, RemoteConnection
from .replication import ReplicationManager, ReplicationRule
from .storage import ObjectStorage
logger = logging.getLogger(__name__)
SITE_SYNC_USER_AGENT = "SiteSyncAgent/1.0"
@dataclass
class SyncedObjectInfo:
last_synced_at: float
remote_etag: str
source: str
def to_dict(self) -> Dict[str, Any]:
return {
"last_synced_at": self.last_synced_at,
"remote_etag": self.remote_etag,
"source": self.source,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "SyncedObjectInfo":
return cls(
last_synced_at=data["last_synced_at"],
remote_etag=data["remote_etag"],
source=data["source"],
)
@dataclass
class SyncState:
synced_objects: Dict[str, SyncedObjectInfo] = field(default_factory=dict)
last_full_sync: Optional[float] = None
def to_dict(self) -> Dict[str, Any]:
return {
"synced_objects": {k: v.to_dict() for k, v in self.synced_objects.items()},
"last_full_sync": self.last_full_sync,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "SyncState":
synced_objects = {}
for k, v in data.get("synced_objects", {}).items():
synced_objects[k] = SyncedObjectInfo.from_dict(v)
return cls(
synced_objects=synced_objects,
last_full_sync=data.get("last_full_sync"),
)
@dataclass
class SiteSyncStats:
last_sync_at: Optional[float] = None
objects_pulled: int = 0
objects_skipped: int = 0
conflicts_resolved: int = 0
deletions_applied: int = 0
errors: int = 0
def to_dict(self) -> Dict[str, Any]:
return {
"last_sync_at": self.last_sync_at,
"objects_pulled": self.objects_pulled,
"objects_skipped": self.objects_skipped,
"conflicts_resolved": self.conflicts_resolved,
"deletions_applied": self.deletions_applied,
"errors": self.errors,
}
@dataclass
class RemoteObjectMeta:
key: str
size: int
last_modified: datetime
etag: str
@classmethod
def from_s3_object(cls, obj: Dict[str, Any]) -> "RemoteObjectMeta":
return cls(
key=obj["Key"],
size=obj.get("Size", 0),
last_modified=obj["LastModified"],
etag=obj.get("ETag", "").strip('"'),
)
def _create_sync_client(
connection: "RemoteConnection",
*,
connect_timeout: int = 10,
read_timeout: int = 120,
max_retries: int = 2,
) -> Any:
config = Config(
user_agent_extra=SITE_SYNC_USER_AGENT,
connect_timeout=connect_timeout,
read_timeout=read_timeout,
retries={"max_attempts": max_retries},
signature_version="s3v4",
s3={"addressing_style": "path"},
request_checksum_calculation="when_required",
response_checksum_validation="when_required",
)
return boto3.client(
"s3",
endpoint_url=connection.endpoint_url,
aws_access_key_id=connection.access_key,
aws_secret_access_key=connection.secret_key,
region_name=connection.region or "us-east-1",
config=config,
)
class SiteSyncWorker:
def __init__(
self,
storage: "ObjectStorage",
connections: "ConnectionStore",
replication_manager: "ReplicationManager",
storage_root: Path,
interval_seconds: int = 60,
batch_size: int = 100,
connect_timeout: int = 10,
read_timeout: int = 120,
max_retries: int = 2,
clock_skew_tolerance_seconds: float = 1.0,
):
self.storage = storage
self.connections = connections
self.replication_manager = replication_manager
self.storage_root = storage_root
self.interval_seconds = interval_seconds
self.batch_size = batch_size
self.connect_timeout = connect_timeout
self.read_timeout = read_timeout
self.max_retries = max_retries
self.clock_skew_tolerance_seconds = clock_skew_tolerance_seconds
self._lock = threading.Lock()
self._shutdown = threading.Event()
self._sync_thread: Optional[threading.Thread] = None
self._bucket_stats: Dict[str, SiteSyncStats] = {}
def _create_client(self, connection: "RemoteConnection") -> Any:
"""Create an S3 client with the worker's configured timeouts."""
return _create_sync_client(
connection,
connect_timeout=self.connect_timeout,
read_timeout=self.read_timeout,
max_retries=self.max_retries,
)
def start(self) -> None:
if self._sync_thread is not None and self._sync_thread.is_alive():
return
self._shutdown.clear()
self._sync_thread = threading.Thread(
target=self._sync_loop, name="site-sync-worker", daemon=True
)
self._sync_thread.start()
logger.info("Site sync worker started (interval=%ds)", self.interval_seconds)
def shutdown(self) -> None:
self._shutdown.set()
if self._sync_thread is not None:
self._sync_thread.join(timeout=10.0)
logger.info("Site sync worker shut down")
def trigger_sync(self, bucket_name: str) -> Optional[SiteSyncStats]:
from .replication import REPLICATION_MODE_BIDIRECTIONAL
rule = self.replication_manager.get_rule(bucket_name)
if not rule or rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled:
return None
return self._sync_bucket(rule)
def get_stats(self, bucket_name: str) -> Optional[SiteSyncStats]:
with self._lock:
return self._bucket_stats.get(bucket_name)
def _sync_loop(self) -> None:
while not self._shutdown.is_set():
self._shutdown.wait(timeout=self.interval_seconds)
if self._shutdown.is_set():
break
self._run_sync_cycle()
def _run_sync_cycle(self) -> None:
from .replication import REPLICATION_MODE_BIDIRECTIONAL
for bucket_name, rule in list(self.replication_manager._rules.items()):
if self._shutdown.is_set():
break
if rule.mode != REPLICATION_MODE_BIDIRECTIONAL or not rule.enabled:
continue
try:
stats = self._sync_bucket(rule)
with self._lock:
self._bucket_stats[bucket_name] = stats
except Exception as e:
logger.exception("Site sync failed for bucket %s: %s", bucket_name, e)
def _sync_bucket(self, rule: "ReplicationRule") -> SiteSyncStats:
stats = SiteSyncStats()
connection = self.connections.get(rule.target_connection_id)
if not connection:
logger.warning("Connection %s not found for bucket %s", rule.target_connection_id, rule.bucket_name)
stats.errors += 1
return stats
try:
local_objects = self._list_local_objects(rule.bucket_name)
except Exception as e:
logger.error("Failed to list local objects for %s: %s", rule.bucket_name, e)
stats.errors += 1
return stats
try:
remote_objects = self._list_remote_objects(rule, connection)
except Exception as e:
logger.error("Failed to list remote objects for %s: %s", rule.bucket_name, e)
stats.errors += 1
return stats
sync_state = self._load_sync_state(rule.bucket_name)
local_keys = set(local_objects.keys())
remote_keys = set(remote_objects.keys())
to_pull = []
for key in remote_keys:
remote_meta = remote_objects[key]
local_meta = local_objects.get(key)
if local_meta is None:
to_pull.append(key)
else:
resolution = self._resolve_conflict(local_meta, remote_meta)
if resolution == "pull":
to_pull.append(key)
stats.conflicts_resolved += 1
else:
stats.objects_skipped += 1
pulled_count = 0
for key in to_pull:
if self._shutdown.is_set():
break
if pulled_count >= self.batch_size:
break
remote_meta = remote_objects[key]
success = self._pull_object(rule, key, connection, remote_meta)
if success:
stats.objects_pulled += 1
pulled_count += 1
sync_state.synced_objects[key] = SyncedObjectInfo(
last_synced_at=time.time(),
remote_etag=remote_meta.etag,
source="remote",
)
else:
stats.errors += 1
if rule.sync_deletions:
for key in list(sync_state.synced_objects.keys()):
if key not in remote_keys and key in local_keys:
tracked = sync_state.synced_objects[key]
if tracked.source == "remote":
local_meta = local_objects.get(key)
if local_meta and local_meta.last_modified.timestamp() <= tracked.last_synced_at:
success = self._apply_remote_deletion(rule.bucket_name, key)
if success:
stats.deletions_applied += 1
del sync_state.synced_objects[key]
sync_state.last_full_sync = time.time()
self._save_sync_state(rule.bucket_name, sync_state)
with self.replication_manager._stats_lock:
rule.last_pull_at = time.time()
self.replication_manager.save_rules()
stats.last_sync_at = time.time()
logger.info(
"Site sync completed for %s: pulled=%d, skipped=%d, conflicts=%d, deletions=%d, errors=%d",
rule.bucket_name,
stats.objects_pulled,
stats.objects_skipped,
stats.conflicts_resolved,
stats.deletions_applied,
stats.errors,
)
return stats
def _list_local_objects(self, bucket_name: str) -> Dict[str, Any]:
from .storage import ObjectMeta
objects = self.storage.list_objects_all(bucket_name)
return {obj.key: obj for obj in objects}
def _list_remote_objects(self, rule: "ReplicationRule", connection: "RemoteConnection") -> Dict[str, RemoteObjectMeta]:
s3 = self._create_client(connection)
result: Dict[str, RemoteObjectMeta] = {}
paginator = s3.get_paginator("list_objects_v2")
try:
for page in paginator.paginate(Bucket=rule.target_bucket):
for obj in page.get("Contents", []):
meta = RemoteObjectMeta.from_s3_object(obj)
result[meta.key] = meta
except ClientError as e:
if e.response["Error"]["Code"] == "NoSuchBucket":
return {}
raise
return result
def _resolve_conflict(self, local_meta: Any, remote_meta: RemoteObjectMeta) -> str:
local_ts = local_meta.last_modified.timestamp()
remote_ts = remote_meta.last_modified.timestamp()
if abs(remote_ts - local_ts) < self.clock_skew_tolerance_seconds:
local_etag = local_meta.etag or ""
if remote_meta.etag == local_etag:
return "skip"
return "pull" if remote_meta.etag > local_etag else "keep"
return "pull" if remote_ts > local_ts else "keep"
def _pull_object(
self,
rule: "ReplicationRule",
object_key: str,
connection: "RemoteConnection",
remote_meta: RemoteObjectMeta,
) -> bool:
s3 = self._create_client(connection)
tmp_path = None
try:
tmp_dir = self.storage_root / ".myfsio.sys" / "tmp"
tmp_dir.mkdir(parents=True, exist_ok=True)
with tempfile.NamedTemporaryFile(dir=tmp_dir, delete=False) as tmp_file:
tmp_path = Path(tmp_file.name)
s3.download_file(rule.target_bucket, object_key, str(tmp_path))
head_response = s3.head_object(Bucket=rule.target_bucket, Key=object_key)
user_metadata = head_response.get("Metadata", {})
with open(tmp_path, "rb") as f:
self.storage.put_object(
rule.bucket_name,
object_key,
f,
metadata=user_metadata if user_metadata else None,
)
logger.debug("Pulled object %s/%s from remote", rule.bucket_name, object_key)
return True
except ClientError as e:
logger.error("Failed to pull %s/%s: %s", rule.bucket_name, object_key, e)
return False
except Exception as e:
logger.error("Failed to store pulled object %s/%s: %s", rule.bucket_name, object_key, e)
return False
finally:
if tmp_path and tmp_path.exists():
try:
tmp_path.unlink()
except OSError:
pass
def _apply_remote_deletion(self, bucket_name: str, object_key: str) -> bool:
try:
self.storage.delete_object(bucket_name, object_key)
logger.debug("Applied remote deletion for %s/%s", bucket_name, object_key)
return True
except Exception as e:
logger.error("Failed to apply remote deletion for %s/%s: %s", bucket_name, object_key, e)
return False
def _sync_state_path(self, bucket_name: str) -> Path:
return self.storage_root / ".myfsio.sys" / "buckets" / bucket_name / "site_sync_state.json"
def _load_sync_state(self, bucket_name: str) -> SyncState:
path = self._sync_state_path(bucket_name)
if not path.exists():
return SyncState()
try:
data = json.loads(path.read_text(encoding="utf-8"))
return SyncState.from_dict(data)
except (json.JSONDecodeError, OSError, KeyError) as e:
logger.warning("Failed to load sync state for %s: %s", bucket_name, e)
return SyncState()
def _save_sync_state(self, bucket_name: str, state: SyncState) -> None:
path = self._sync_state_path(bucket_name)
path.parent.mkdir(parents=True, exist_ok=True)
try:
path.write_text(json.dumps(state.to_dict(), indent=2), encoding="utf-8")
except OSError as e:
logger.warning("Failed to save sync state for %s: %s", bucket_name, e)

File diff suppressed because it is too large Load Diff

View File

@@ -1,215 +0,0 @@
from __future__ import annotations
import json
import logging
import threading
import time
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, TYPE_CHECKING
import psutil
if TYPE_CHECKING:
from .storage import ObjectStorage
logger = logging.getLogger(__name__)
@dataclass
class SystemMetricsSnapshot:
timestamp: datetime
cpu_percent: float
memory_percent: float
disk_percent: float
storage_bytes: int
def to_dict(self) -> Dict[str, Any]:
return {
"timestamp": self.timestamp.strftime("%Y-%m-%dT%H:%M:%SZ"),
"cpu_percent": round(self.cpu_percent, 2),
"memory_percent": round(self.memory_percent, 2),
"disk_percent": round(self.disk_percent, 2),
"storage_bytes": self.storage_bytes,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "SystemMetricsSnapshot":
timestamp_str = data["timestamp"]
if timestamp_str.endswith("Z"):
timestamp_str = timestamp_str[:-1] + "+00:00"
return cls(
timestamp=datetime.fromisoformat(timestamp_str),
cpu_percent=data.get("cpu_percent", 0.0),
memory_percent=data.get("memory_percent", 0.0),
disk_percent=data.get("disk_percent", 0.0),
storage_bytes=data.get("storage_bytes", 0),
)
class SystemMetricsCollector:
def __init__(
self,
storage_root: Path,
interval_minutes: int = 5,
retention_hours: int = 24,
):
self.storage_root = storage_root
self.interval_seconds = interval_minutes * 60
self.retention_hours = retention_hours
self._lock = threading.Lock()
self._shutdown = threading.Event()
self._snapshots: List[SystemMetricsSnapshot] = []
self._storage_ref: Optional["ObjectStorage"] = None
self._load_history()
self._snapshot_thread = threading.Thread(
target=self._snapshot_loop,
name="system-metrics-snapshot",
daemon=True,
)
self._snapshot_thread.start()
def set_storage(self, storage: "ObjectStorage") -> None:
with self._lock:
self._storage_ref = storage
def _config_path(self) -> Path:
return self.storage_root / ".myfsio.sys" / "config" / "metrics_history.json"
def _load_history(self) -> None:
config_path = self._config_path()
if not config_path.exists():
return
try:
data = json.loads(config_path.read_text(encoding="utf-8"))
history_data = data.get("history", [])
self._snapshots = [SystemMetricsSnapshot.from_dict(s) for s in history_data]
self._prune_old_snapshots()
except (json.JSONDecodeError, OSError, KeyError) as e:
logger.warning(f"Failed to load system metrics history: {e}")
def _save_history(self) -> None:
config_path = self._config_path()
config_path.parent.mkdir(parents=True, exist_ok=True)
try:
data = {"history": [s.to_dict() for s in self._snapshots]}
config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
except OSError as e:
logger.warning(f"Failed to save system metrics history: {e}")
def _prune_old_snapshots(self) -> None:
if not self._snapshots:
return
cutoff = datetime.now(timezone.utc).timestamp() - (self.retention_hours * 3600)
self._snapshots = [
s for s in self._snapshots if s.timestamp.timestamp() > cutoff
]
def _snapshot_loop(self) -> None:
while not self._shutdown.is_set():
self._shutdown.wait(timeout=self.interval_seconds)
if not self._shutdown.is_set():
self._take_snapshot()
def _take_snapshot(self) -> None:
try:
cpu_percent = psutil.cpu_percent(interval=0.1)
memory = psutil.virtual_memory()
disk = psutil.disk_usage(str(self.storage_root))
storage_bytes = 0
with self._lock:
storage = self._storage_ref
if storage:
try:
buckets = storage.list_buckets()
for bucket in buckets:
stats = storage.bucket_stats(bucket.name, cache_ttl=60)
storage_bytes += stats.get("total_bytes", stats.get("bytes", 0))
except Exception as e:
logger.warning(f"Failed to collect bucket stats: {e}")
snapshot = SystemMetricsSnapshot(
timestamp=datetime.now(timezone.utc),
cpu_percent=cpu_percent,
memory_percent=memory.percent,
disk_percent=disk.percent,
storage_bytes=storage_bytes,
)
with self._lock:
self._snapshots.append(snapshot)
self._prune_old_snapshots()
self._save_history()
logger.debug(f"System metrics snapshot taken: CPU={cpu_percent:.1f}%, Memory={memory.percent:.1f}%")
except Exception as e:
logger.warning(f"Failed to take system metrics snapshot: {e}")
def get_current(self) -> Dict[str, Any]:
cpu_percent = psutil.cpu_percent(interval=0.1)
memory = psutil.virtual_memory()
disk = psutil.disk_usage(str(self.storage_root))
boot_time = psutil.boot_time()
uptime_seconds = time.time() - boot_time
uptime_days = int(uptime_seconds / 86400)
total_buckets = 0
total_objects = 0
total_bytes_used = 0
total_versions = 0
with self._lock:
storage = self._storage_ref
if storage:
try:
buckets = storage.list_buckets()
total_buckets = len(buckets)
for bucket in buckets:
stats = storage.bucket_stats(bucket.name, cache_ttl=60)
total_objects += stats.get("total_objects", stats.get("objects", 0))
total_bytes_used += stats.get("total_bytes", stats.get("bytes", 0))
total_versions += stats.get("version_count", 0)
except Exception as e:
logger.warning(f"Failed to collect current bucket stats: {e}")
return {
"cpu_percent": round(cpu_percent, 2),
"memory": {
"total": memory.total,
"available": memory.available,
"used": memory.used,
"percent": round(memory.percent, 2),
},
"disk": {
"total": disk.total,
"free": disk.free,
"used": disk.used,
"percent": round(disk.percent, 2),
},
"app": {
"buckets": total_buckets,
"objects": total_objects,
"versions": total_versions,
"storage_bytes": total_bytes_used,
"uptime_days": uptime_days,
},
}
def get_history(self, hours: Optional[int] = None) -> List[Dict[str, Any]]:
with self._lock:
snapshots = list(self._snapshots)
if hours:
cutoff = datetime.now(timezone.utc).timestamp() - (hours * 3600)
snapshots = [s for s in snapshots if s.timestamp.timestamp() > cutoff]
return [s.to_dict() for s in snapshots]
def shutdown(self) -> None:
self._shutdown.set()
self._take_snapshot()
self._snapshot_thread.join(timeout=5.0)

2913
app/ui.py

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,7 @@
"""Central location for the application version string."""
from __future__ import annotations from __future__ import annotations
APP_VERSION = "0.3.0" APP_VERSION = "0.1.7"
def get_version() -> str: def get_version() -> str:

View File

@@ -1,79 +0,0 @@
from __future__ import annotations
import json
import re
import threading
from pathlib import Path
from typing import Dict, List, Optional
_DOMAIN_RE = re.compile(
r"^(?!-)[a-z0-9]([a-z0-9-]*[a-z0-9])?(\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$"
)
def normalize_domain(raw: str) -> str:
raw = raw.strip().lower()
for prefix in ("https://", "http://"):
if raw.startswith(prefix):
raw = raw[len(prefix):]
raw = raw.split("/", 1)[0]
raw = raw.split("?", 1)[0]
raw = raw.split("#", 1)[0]
if ":" in raw:
raw = raw.rsplit(":", 1)[0]
return raw
def is_valid_domain(domain: str) -> bool:
if not domain or len(domain) > 253:
return False
return bool(_DOMAIN_RE.match(domain))
class WebsiteDomainStore:
def __init__(self, config_path: Path) -> None:
self.config_path = config_path
self._lock = threading.Lock()
self._domains: Dict[str, str] = {}
self.reload()
def reload(self) -> None:
if not self.config_path.exists():
self._domains = {}
return
try:
with open(self.config_path, "r", encoding="utf-8") as f:
data = json.load(f)
if isinstance(data, dict):
self._domains = {k.lower(): v for k, v in data.items()}
else:
self._domains = {}
except (OSError, json.JSONDecodeError):
self._domains = {}
def _save(self) -> None:
self.config_path.parent.mkdir(parents=True, exist_ok=True)
with open(self.config_path, "w", encoding="utf-8") as f:
json.dump(self._domains, f, indent=2)
def list_all(self) -> List[Dict[str, str]]:
with self._lock:
return [{"domain": d, "bucket": b} for d, b in self._domains.items()]
def get_bucket(self, domain: str) -> Optional[str]:
with self._lock:
return self._domains.get(domain.lower())
def set_mapping(self, domain: str, bucket: str) -> None:
with self._lock:
self._domains[domain.lower()] = bucket
self._save()
def delete_mapping(self, domain: str) -> bool:
with self._lock:
key = domain.lower()
if key not in self._domains:
return False
del self._domains[key]
self._save()
return True

1367
docs.md

File diff suppressed because it is too large Load Diff

421
myfsio_core/Cargo.lock generated
View File

@@ -1,421 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
dependencies = [
"memchr",
]
[[package]]
name = "allocator-api2"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "bitflags"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af"
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "cfg-if"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
[[package]]
name = "cpufeatures"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
dependencies = [
"libc",
]
[[package]]
name = "crypto-common"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
"subtle",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "hashbrown"
version = "0.15.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash",
]
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "hex"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "hmac"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
dependencies = [
"digest",
]
[[package]]
name = "libc"
version = "0.2.182"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
[[package]]
name = "lock_api"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
dependencies = [
"scopeguard",
]
[[package]]
name = "lru"
version = "0.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198"
dependencies = [
"hashbrown",
]
[[package]]
name = "md-5"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
dependencies = [
"cfg-if",
"digest",
]
[[package]]
name = "memchr"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "myfsio_core"
version = "0.1.0"
dependencies = [
"hex",
"hmac",
"lru",
"md-5",
"parking_lot",
"pyo3",
"regex",
"sha2",
"unicode-normalization",
]
[[package]]
name = "once_cell"
version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
[[package]]
name = "parking_lot"
version = "0.12.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-link",
]
[[package]]
name = "portable-atomic"
version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
[[package]]
name = "pyo3"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14c738662e2181be11cb82487628404254902bb3225d8e9e99c31f3ef82a405c"
dependencies = [
"libc",
"once_cell",
"portable-atomic",
"pyo3-build-config",
"pyo3-ffi",
"pyo3-macros",
]
[[package]]
name = "pyo3-build-config"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f9ca0864a7dd3c133a7f3f020cbff2e12e88420da854c35540fd20ce2d60e435"
dependencies = [
"target-lexicon",
]
[[package]]
name = "pyo3-ffi"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dfc1956b709823164763a34cc42bbfd26b8730afa77809a3df8b94a3ae3b059"
dependencies = [
"libc",
"pyo3-build-config",
]
[[package]]
name = "pyo3-macros"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29dc660ad948bae134d579661d08033fbb1918f4529c3bbe3257a68f2009ddf2"
dependencies = [
"proc-macro2",
"pyo3-macros-backend",
"quote",
"syn",
]
[[package]]
name = "pyo3-macros-backend"
version = "0.28.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78cd6c6d718acfcedf26c3d21fe0f053624368b0d44298c55d7138fde9331f7"
dependencies = [
"heck",
"proc-macro2",
"pyo3-build-config",
"quote",
"syn",
]
[[package]]
name = "quote"
version = "1.0.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
dependencies = [
"proc-macro2",
]
[[package]]
name = "redox_syscall"
version = "0.5.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
dependencies = [
"bitflags",
]
[[package]]
name = "regex"
version = "1.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "sha2"
version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "subtle"
version = "2.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
[[package]]
name = "syn"
version = "2.0.116"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "target-lexicon"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
[[package]]
name = "tinyvec"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "typenum"
version = "1.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb"
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "unicode-normalization"
version = "0.1.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
dependencies = [
"tinyvec",
]
[[package]]
name = "version_check"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"

View File

@@ -1,19 +0,0 @@
[package]
name = "myfsio_core"
version = "0.1.0"
edition = "2021"
[lib]
name = "myfsio_core"
crate-type = ["cdylib"]
[dependencies]
pyo3 = { version = "0.28", features = ["extension-module"] }
hmac = "0.12"
sha2 = "0.10"
md-5 = "0.10"
hex = "0.4"
unicode-normalization = "0.1"
regex = "1"
lru = "0.14"
parking_lot = "0.12"

View File

@@ -1,11 +0,0 @@
[build-system]
requires = ["maturin>=1.0,<2.0"]
build-backend = "maturin"
[project]
name = "myfsio_core"
version = "0.1.0"
requires-python = ">=3.10"
[tool.maturin]
features = ["pyo3/extension-module"]

View File

@@ -1,90 +0,0 @@
use md5::{Digest, Md5};
use pyo3::exceptions::PyIOError;
use pyo3::prelude::*;
use sha2::Sha256;
use std::fs::File;
use std::io::Read;
const CHUNK_SIZE: usize = 65536;
#[pyfunction]
pub fn md5_file(py: Python<'_>, path: &str) -> PyResult<String> {
let path = path.to_owned();
py.detach(move || {
let mut file = File::open(&path)
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
let mut hasher = Md5::new();
let mut buf = vec![0u8; CHUNK_SIZE];
loop {
let n = file
.read(&mut buf)
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(format!("{:x}", hasher.finalize()))
})
}
#[pyfunction]
pub fn md5_bytes(data: &[u8]) -> String {
let mut hasher = Md5::new();
hasher.update(data);
format!("{:x}", hasher.finalize())
}
#[pyfunction]
pub fn sha256_file(py: Python<'_>, path: &str) -> PyResult<String> {
let path = path.to_owned();
py.detach(move || {
let mut file = File::open(&path)
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
let mut hasher = Sha256::new();
let mut buf = vec![0u8; CHUNK_SIZE];
loop {
let n = file
.read(&mut buf)
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(format!("{:x}", hasher.finalize()))
})
}
#[pyfunction]
pub fn sha256_bytes(data: &[u8]) -> String {
let mut hasher = Sha256::new();
hasher.update(data);
format!("{:x}", hasher.finalize())
}
#[pyfunction]
pub fn md5_sha256_file(py: Python<'_>, path: &str) -> PyResult<(String, String)> {
let path = path.to_owned();
py.detach(move || {
let mut file = File::open(&path)
.map_err(|e| PyIOError::new_err(format!("Failed to open file: {}", e)))?;
let mut md5_hasher = Md5::new();
let mut sha_hasher = Sha256::new();
let mut buf = vec![0u8; CHUNK_SIZE];
loop {
let n = file
.read(&mut buf)
.map_err(|e| PyIOError::new_err(format!("Failed to read file: {}", e)))?;
if n == 0 {
break;
}
md5_hasher.update(&buf[..n]);
sha_hasher.update(&buf[..n]);
}
Ok((
format!("{:x}", md5_hasher.finalize()),
format!("{:x}", sha_hasher.finalize()),
))
})
}

View File

@@ -1,30 +0,0 @@
mod hashing;
mod sigv4;
mod validation;
use pyo3::prelude::*;
#[pymodule]
mod myfsio_core {
use super::*;
#[pymodule_init]
fn init(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_function(wrap_pyfunction!(sigv4::derive_signing_key, m)?)?;
m.add_function(wrap_pyfunction!(sigv4::compute_signature, m)?)?;
m.add_function(wrap_pyfunction!(sigv4::build_string_to_sign, m)?)?;
m.add_function(wrap_pyfunction!(sigv4::constant_time_compare, m)?)?;
m.add_function(wrap_pyfunction!(sigv4::clear_signing_key_cache, m)?)?;
m.add_function(wrap_pyfunction!(hashing::md5_file, m)?)?;
m.add_function(wrap_pyfunction!(hashing::md5_bytes, m)?)?;
m.add_function(wrap_pyfunction!(hashing::sha256_file, m)?)?;
m.add_function(wrap_pyfunction!(hashing::sha256_bytes, m)?)?;
m.add_function(wrap_pyfunction!(hashing::md5_sha256_file, m)?)?;
m.add_function(wrap_pyfunction!(validation::validate_object_key, m)?)?;
m.add_function(wrap_pyfunction!(validation::validate_bucket_name, m)?)?;
Ok(())
}
}

View File

@@ -1,108 +0,0 @@
use hmac::{Hmac, Mac};
use lru::LruCache;
use parking_lot::Mutex;
use pyo3::prelude::*;
use sha2::{Digest, Sha256};
use std::num::NonZeroUsize;
use std::sync::LazyLock;
use std::time::Instant;
type HmacSha256 = Hmac<Sha256>;
struct CacheEntry {
key: Vec<u8>,
created: Instant,
}
static SIGNING_KEY_CACHE: LazyLock<Mutex<LruCache<(String, String, String, String), CacheEntry>>> =
LazyLock::new(|| Mutex::new(LruCache::new(NonZeroUsize::new(256).unwrap())));
const CACHE_TTL_SECS: u64 = 60;
fn hmac_sha256(key: &[u8], msg: &[u8]) -> Vec<u8> {
let mut mac = HmacSha256::new_from_slice(key).expect("HMAC key length is always valid");
mac.update(msg);
mac.finalize().into_bytes().to_vec()
}
#[pyfunction]
pub fn derive_signing_key(
secret_key: &str,
date_stamp: &str,
region: &str,
service: &str,
) -> Vec<u8> {
let cache_key = (
secret_key.to_owned(),
date_stamp.to_owned(),
region.to_owned(),
service.to_owned(),
);
{
let mut cache = SIGNING_KEY_CACHE.lock();
if let Some(entry) = cache.get(&cache_key) {
if entry.created.elapsed().as_secs() < CACHE_TTL_SECS {
return entry.key.clone();
}
cache.pop(&cache_key);
}
}
let k_date = hmac_sha256(format!("AWS4{}", secret_key).as_bytes(), date_stamp.as_bytes());
let k_region = hmac_sha256(&k_date, region.as_bytes());
let k_service = hmac_sha256(&k_region, service.as_bytes());
let k_signing = hmac_sha256(&k_service, b"aws4_request");
{
let mut cache = SIGNING_KEY_CACHE.lock();
cache.put(
cache_key,
CacheEntry {
key: k_signing.clone(),
created: Instant::now(),
},
);
}
k_signing
}
#[pyfunction]
pub fn compute_signature(signing_key: &[u8], string_to_sign: &str) -> String {
let sig = hmac_sha256(signing_key, string_to_sign.as_bytes());
hex::encode(sig)
}
fn sha256_hex(data: &[u8]) -> String {
let mut hasher = Sha256::new();
hasher.update(data);
hex::encode(hasher.finalize())
}
#[pyfunction]
pub fn build_string_to_sign(
amz_date: &str,
credential_scope: &str,
canonical_request: &str,
) -> String {
let cr_hash = sha256_hex(canonical_request.as_bytes());
format!("AWS4-HMAC-SHA256\n{}\n{}\n{}", amz_date, credential_scope, cr_hash)
}
#[pyfunction]
pub fn constant_time_compare(a: &str, b: &str) -> bool {
if a.len() != b.len() {
return false;
}
let mut result: u8 = 0;
for (x, y) in a.bytes().zip(b.bytes()) {
result |= x ^ y;
}
result == 0
}
#[pyfunction]
pub fn clear_signing_key_cache() {
SIGNING_KEY_CACHE.lock().clear();
}

View File

@@ -1,149 +0,0 @@
use pyo3::prelude::*;
use std::sync::LazyLock;
use unicode_normalization::UnicodeNormalization;
const WINDOWS_RESERVED: &[&str] = &[
"CON", "PRN", "AUX", "NUL", "COM0", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7",
"COM8", "COM9", "LPT0", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8",
"LPT9",
];
const WINDOWS_ILLEGAL_CHARS: &[char] = &['<', '>', ':', '"', '/', '\\', '|', '?', '*'];
const INTERNAL_FOLDERS: &[&str] = &[".meta", ".versions", ".multipart"];
const SYSTEM_ROOT: &str = ".myfsio.sys";
static IP_REGEX: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$").unwrap());
#[pyfunction]
#[pyo3(signature = (object_key, max_length_bytes=1024, is_windows=false, reserved_prefixes=None))]
pub fn validate_object_key(
object_key: &str,
max_length_bytes: usize,
is_windows: bool,
reserved_prefixes: Option<Vec<String>>,
) -> PyResult<Option<String>> {
if object_key.is_empty() {
return Ok(Some("Object key required".to_string()));
}
if object_key.contains('\0') {
return Ok(Some("Object key contains null bytes".to_string()));
}
let normalized: String = object_key.nfc().collect();
if normalized.len() > max_length_bytes {
return Ok(Some(format!(
"Object key exceeds maximum length of {} bytes",
max_length_bytes
)));
}
if normalized.starts_with('/') || normalized.starts_with('\\') {
return Ok(Some("Object key cannot start with a slash".to_string()));
}
let parts: Vec<&str> = if cfg!(windows) || is_windows {
normalized.split(['/', '\\']).collect()
} else {
normalized.split('/').collect()
};
for part in &parts {
if part.is_empty() {
continue;
}
if *part == ".." {
return Ok(Some(
"Object key contains parent directory references".to_string(),
));
}
if *part == "." {
return Ok(Some("Object key contains invalid segments".to_string()));
}
if part.chars().any(|c| (c as u32) < 32) {
return Ok(Some(
"Object key contains control characters".to_string(),
));
}
if is_windows {
if part.chars().any(|c| WINDOWS_ILLEGAL_CHARS.contains(&c)) {
return Ok(Some(
"Object key contains characters not supported on Windows filesystems"
.to_string(),
));
}
if part.ends_with(' ') || part.ends_with('.') {
return Ok(Some(
"Object key segments cannot end with spaces or periods on Windows".to_string(),
));
}
let trimmed = part.trim_end_matches(['.', ' ']).to_uppercase();
if WINDOWS_RESERVED.contains(&trimmed.as_str()) {
return Ok(Some(format!("Invalid filename segment: {}", part)));
}
}
}
let non_empty_parts: Vec<&str> = parts.iter().filter(|p| !p.is_empty()).copied().collect();
if let Some(top) = non_empty_parts.first() {
if INTERNAL_FOLDERS.contains(top) || *top == SYSTEM_ROOT {
return Ok(Some("Object key uses a reserved prefix".to_string()));
}
if let Some(ref prefixes) = reserved_prefixes {
for prefix in prefixes {
if *top == prefix.as_str() {
return Ok(Some("Object key uses a reserved prefix".to_string()));
}
}
}
}
Ok(None)
}
#[pyfunction]
pub fn validate_bucket_name(bucket_name: &str) -> Option<String> {
let len = bucket_name.len();
if len < 3 || len > 63 {
return Some("Bucket name must be between 3 and 63 characters".to_string());
}
let bytes = bucket_name.as_bytes();
if !bytes[0].is_ascii_lowercase() && !bytes[0].is_ascii_digit() {
return Some(
"Bucket name must start and end with a lowercase letter or digit".to_string(),
);
}
if !bytes[len - 1].is_ascii_lowercase() && !bytes[len - 1].is_ascii_digit() {
return Some(
"Bucket name must start and end with a lowercase letter or digit".to_string(),
);
}
for &b in bytes {
if !b.is_ascii_lowercase() && !b.is_ascii_digit() && b != b'.' && b != b'-' {
return Some(
"Bucket name can only contain lowercase letters, digits, dots, and hyphens"
.to_string(),
);
}
}
if bucket_name.contains("..") {
return Some("Bucket name must not contain consecutive periods".to_string());
}
if IP_REGEX.is_match(bucket_name) {
return Some("Bucket name must not be formatted as an IP address".to_string());
}
None
}

View File

@@ -1 +0,0 @@
{"rustc_fingerprint":13172970000770725120,"outputs":{"7971740275564407648":{"success":true,"status":"","code":0,"stdout":"___.exe\nlib___.rlib\n___.dll\n___.dll\n___.lib\n___.dll\nC:\\Users\\jun\\.rustup\\toolchains\\stable-x86_64-pc-windows-msvc\npacked\n___\ndebug_assertions\npanic=\"unwind\"\nproc_macro\ntarget_abi=\"\"\ntarget_arch=\"x86_64\"\ntarget_endian=\"little\"\ntarget_env=\"msvc\"\ntarget_family=\"windows\"\ntarget_feature=\"cmpxchg16b\"\ntarget_feature=\"fxsr\"\ntarget_feature=\"sse\"\ntarget_feature=\"sse2\"\ntarget_feature=\"sse3\"\ntarget_has_atomic=\"128\"\ntarget_has_atomic=\"16\"\ntarget_has_atomic=\"32\"\ntarget_has_atomic=\"64\"\ntarget_has_atomic=\"8\"\ntarget_has_atomic=\"ptr\"\ntarget_os=\"windows\"\ntarget_pointer_width=\"64\"\ntarget_vendor=\"pc\"\nwindows\n","stderr":""},"17747080675513052775":{"success":true,"status":"","code":0,"stdout":"rustc 1.93.1 (01f6ddf75 2026-02-11)\nbinary: rustc\ncommit-hash: 01f6ddf7588f42ae2d7eb0a2f21d44e8e96674cf\ncommit-date: 2026-02-11\nhost: x86_64-pc-windows-msvc\nrelease: 1.93.1\nLLVM version: 21.1.8\n","stderr":""}},"successes":{}}

View File

@@ -1,3 +0,0 @@
Signature: 8a477f597d28d172789f06886806bc55
# This file is a cache directory tag created by cargo.
# For information about cache directory tags see https://bford.info/cachedir/

View File

@@ -1 +0,0 @@
This file has an mtime of when this was started.

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"[\"perf-literal\", \"std\"]","declared_features":"[\"default\", \"logging\", \"perf-literal\", \"std\"]","target":7534583537114156500,"profile":2040997289075261528,"path":6364296192483896971,"deps":[[1363051979936526615,"memchr",false,11090220145123168660]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\aho-corasick-45694771b543be75\\dep-lib-aho_corasick","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -1 +0,0 @@
This file has an mtime of when this was started.

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"[\"alloc\"]","declared_features":"[\"alloc\", \"default\", \"fresh-rust\", \"nightly\", \"serde\", \"std\"]","target":5388200169723499962,"profile":4067574213046180398,"path":10654049299693593327,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\allocator-api2-db7934dbe96de5b4\\dep-lib-allocator_api2","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -1 +0,0 @@
This file has an mtime of when this was started.

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":6962977057026645649,"profile":1369601567987815722,"path":9853093265219907461,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\autocfg-1c4fb7a37cc3df69\\dep-lib-autocfg","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -1 +0,0 @@
This file has an mtime of when this was started.

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":4098124618827574291,"profile":2040997289075261528,"path":3658007358608479489,"deps":[[10520923840501062997,"generic_array",false,11555283918993371487]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\block-buffer-95b0ac364bec72f9\\dep-lib-block_buffer","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -1 +0,0 @@
This file has an mtime of when this was started.

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"core\", \"rustc-dep-of-std\"]","target":13840298032947503755,"profile":2040997289075261528,"path":4093486168504982869,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\cfg-if-be2711f84a777e73\\dep-lib-cfg_if","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -1 +0,0 @@
This file has an mtime of when this was started.

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":2330704043955282025,"profile":2040997289075261528,"path":13200428550696548327,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\cpufeatures-980094f8735c42d1\\dep-lib-cpufeatures","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -1 +0,0 @@
This file has an mtime of when this was started.

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"[\"std\"]","declared_features":"[\"getrandom\", \"rand_core\", \"std\"]","target":12082577455412410174,"profile":2040997289075261528,"path":14902376638882023040,"deps":[[857979250431893282,"typenum",false,7416411392359930020],[10520923840501062997,"generic_array",false,11555283918993371487]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\crypto-common-289a508abdda3048\\dep-lib-crypto_common","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -1 +0,0 @@
This file has an mtime of when this was started.

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"[\"alloc\", \"block-buffer\", \"core-api\", \"default\", \"mac\", \"std\", \"subtle\"]","declared_features":"[\"alloc\", \"blobby\", \"block-buffer\", \"const-oid\", \"core-api\", \"default\", \"dev\", \"mac\", \"oid\", \"rand_core\", \"std\", \"subtle\"]","target":7510122432137863311,"profile":2040997289075261528,"path":11503432597517024930,"deps":[[6039282458970808711,"crypto_common",false,11252724541433210505],[10626340395483396037,"block_buffer",false,17139625223017709343],[17003143334332120809,"subtle",false,8597342066671925934]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\digest-a91458bfa5613332\\dep-lib-digest","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -1 +0,0 @@
This file has an mtime of when this was started.

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"[]","declared_features":"[]","target":1524667692659508025,"profile":2040997289075261528,"path":17534356223679657546,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\equivalent-943ac856871c0988\\dep-lib-equivalent","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -1 +0,0 @@
This file has an mtime of when this was started.

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"[]","declared_features":"[\"default\", \"std\"]","target":18077926938045032029,"profile":2040997289075261528,"path":9869209539952544870,"deps":[],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\foldhash-b8a92f8c10d550f7\\dep-lib-foldhash","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"[\"more_lengths\"]","declared_features":"[\"more_lengths\", \"serde\", \"zeroize\"]","target":12318548087768197662,"profile":1369601567987815722,"path":13853454403963664247,"deps":[[5398981501050481332,"version_check",false,16419025953046340415]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\generic-array-2462daa120fe5936\\dep-build-script-build-script-build","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -1 +0,0 @@
This file has an mtime of when this was started.

View File

@@ -1 +0,0 @@
This file has an mtime of when this was started.

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"[\"more_lengths\"]","declared_features":"[\"more_lengths\", \"serde\", \"zeroize\"]","target":13084005262763373425,"profile":2040997289075261528,"path":12463275850883329568,"deps":[[857979250431893282,"typenum",false,7416411392359930020],[10520923840501062997,"build_script_build",false,16977603856295925732]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\generic-array-62216349963f3a3c\\dep-lib-generic_array","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"","declared_features":"","target":0,"profile":0,"path":0,"deps":[[10520923840501062997,"build_script_build",false,464306762232604144]],"local":[{"Precalculated":"0.14.7"}],"rustflags":[],"config":0,"compile_kind":0}

View File

@@ -1 +0,0 @@
This file has an mtime of when this was started.

View File

@@ -1 +0,0 @@
{"rustc":8323788817864214825,"features":"[\"allocator-api2\", \"default\", \"default-hasher\", \"equivalent\", \"inline-more\", \"raw-entry\"]","declared_features":"[\"alloc\", \"allocator-api2\", \"core\", \"default\", \"default-hasher\", \"equivalent\", \"inline-more\", \"nightly\", \"raw-entry\", \"rayon\", \"rustc-dep-of-std\", \"rustc-internal-api\", \"serde\"]","target":13796197676120832388,"profile":2040997289075261528,"path":12448322139402656924,"deps":[[5230392855116717286,"equivalent",false,6042941999404782907],[9150530836556604396,"allocator_api2",false,16398368410642502979],[10842263908529601448,"foldhash",false,10953695263156452023]],"local":[{"CheckDepInfo":{"dep_info":"release\\.fingerprint\\hashbrown-510d641b592c306b\\dep-lib-hashbrown","checksum":false}}],"rustflags":[],"config":2069994364910194474,"compile_kind":0}

Some files were not shown because too many files have changed in this diff Show More