From b25b9d7aec36b8a290adf24822354abc1f352933 Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Tue, 23 Jan 2024 12:48:51 -0500 Subject: [PATCH 01/10] Update LICENSE change to apache 2 --- LICENSE | 875 +++++++++++++------------------------------------------- 1 file changed, 201 insertions(+), 674 deletions(-) diff --git a/LICENSE b/LICENSE index f288702d..261eeb9e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,674 +1,201 @@ - GNU GENERAL PUBLIC LICENSE - Version 3, 29 June 2007 - - Copyright (C) 2007 Free Software Foundation, Inc. - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The GNU General Public License is a free, copyleft license for -software and other kinds of works. - - The licenses for most software and other practical works are designed -to take away your freedom to share and change the works. By contrast, -the GNU General Public License is intended to guarantee your freedom to -share and change all versions of a program--to make sure it remains free -software for all its users. We, the Free Software Foundation, use the -GNU General Public License for most of our software; it applies also to -any other work released this way by its authors. You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -them if you wish), that you receive source code or can get it if you -want it, that you can change the software or use pieces of it in new -free programs, and that you know you can do these things. - - To protect your rights, we need to prevent others from denying you -these rights or asking you to surrender the rights. Therefore, you have -certain responsibilities if you distribute copies of the software, or if -you modify it: responsibilities to respect the freedom of others. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must pass on to the recipients the same -freedoms that you received. You must make sure that they, too, receive -or can get the source code. And you must show them these terms so they -know their rights. - - Developers that use the GNU GPL protect your rights with two steps: -(1) assert copyright on the software, and (2) offer you this License -giving you legal permission to copy, distribute and/or modify it. - - For the developers' and authors' protection, the GPL clearly explains -that there is no warranty for this free software. For both users' and -authors' sake, the GPL requires that modified versions be marked as -changed, so that their problems will not be attributed erroneously to -authors of previous versions. - - Some devices are designed to deny users access to install or run -modified versions of the software inside them, although the manufacturer -can do so. This is fundamentally incompatible with the aim of -protecting users' freedom to change the software. The systematic -pattern of such abuse occurs in the area of products for individuals to -use, which is precisely where it is most unacceptable. Therefore, we -have designed this version of the GPL to prohibit the practice for those -products. If such problems arise substantially in other domains, we -stand ready to extend this provision to those domains in future versions -of the GPL, as needed to protect the freedom of users. - - Finally, every program is threatened constantly by software patents. -States should not allow patents to restrict development and use of -software on general-purpose computers, but in those that do, we wish to -avoid the special danger that patents applied to a free program could -make it effectively proprietary. To prevent this, the GPL assures that -patents cannot be used to render the program non-free. - - The precise terms and conditions for copying, distribution and -modification follow. - - TERMS AND CONDITIONS - - 0. Definitions. - - "This License" refers to version 3 of the GNU General Public License. - - "Copyright" also means copyright-like laws that apply to other kinds of -works, such as semiconductor masks. - - "The Program" refers to any copyrightable work licensed under this -License. Each licensee is addressed as "you". "Licensees" and -"recipients" may be individuals or organizations. - - To "modify" a work means to copy from or adapt all or part of the work -in a fashion requiring copyright permission, other than the making of an -exact copy. The resulting work is called a "modified version" of the -earlier work or a work "based on" the earlier work. - - A "covered work" means either the unmodified Program or a work based -on the Program. - - To "propagate" a work means to do anything with it that, without -permission, would make you directly or secondarily liable for -infringement under applicable copyright law, except executing it on a -computer or modifying a private copy. Propagation includes copying, -distribution (with or without modification), making available to the -public, and in some countries other activities as well. - - To "convey" a work means any kind of propagation that enables other -parties to make or receive copies. Mere interaction with a user through -a computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays "Appropriate Legal Notices" -to the extent that it includes a convenient and prominently visible -feature that (1) displays an appropriate copyright notice, and (2) -tells the user that there is no warranty for the work (except to the -extent that warranties are provided), that licensees may convey the -work under this License, and how to view a copy of this License. If -the interface presents a list of user commands or options, such as a -menu, a prominent item in the list meets this criterion. - - 1. Source Code. - - The "source code" for a work means the preferred form of the work -for making modifications to it. "Object code" means any non-source -form of a work. - - A "Standard Interface" means an interface that either is an official -standard defined by a recognized standards body, or, in the case of -interfaces specified for a particular programming language, one that -is widely used among developers working in that language. - - The "System Libraries" of an executable work include anything, other -than the work as a whole, that (a) is included in the normal form of -packaging a Major Component, but which is not part of that Major -Component, and (b) serves only to enable use of the work with that -Major Component, or to implement a Standard Interface for which an -implementation is available to the public in source code form. A -"Major Component", in this context, means a major essential component -(kernel, window system, and so on) of the specific operating system -(if any) on which the executable work runs, or a compiler used to -produce the work, or an object code interpreter used to run it. - - The "Corresponding Source" for a work in object code form means all -the source code needed to generate, install, and (for an executable -work) run the object code and to modify the work, including scripts to -control those activities. However, it does not include the work's -System Libraries, or general-purpose tools or generally available free -programs which are used unmodified in performing those activities but -which are not part of the work. For example, Corresponding Source -includes interface definition files associated with source files for -the work, and the source code for shared libraries and dynamically -linked subprograms that the work is specifically designed to require, -such as by intimate data communication or control flow between those -subprograms and other parts of the work. - - The Corresponding Source need not include anything that users -can regenerate automatically from other parts of the Corresponding -Source. - - The Corresponding Source for a work in source code form is that -same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of -copyright on the Program, and are irrevocable provided the stated -conditions are met. This License explicitly affirms your unlimited -permission to run the unmodified Program. The output from running a -covered work is covered by this License only if the output, given its -content, constitutes a covered work. This License acknowledges your -rights of fair use or other equivalent, as provided by copyright law. - - You may make, run and propagate covered works that you do not -convey, without conditions so long as your license otherwise remains -in force. You may convey covered works to others for the sole purpose -of having them make modifications exclusively for you, or provide you -with facilities for running those works, provided that you comply with -the terms of this License in conveying all material for which you do -not control copyright. Those thus making or running the covered works -for you must do so exclusively on your behalf, under your direction -and control, on terms that prohibit them from making any copies of -your copyrighted material outside their relationship with you. - - Conveying under any other circumstances is permitted solely under -the conditions stated below. Sublicensing is not allowed; section 10 -makes it unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological -measure under any applicable law fulfilling obligations under article -11 of the WIPO copyright treaty adopted on 20 December 1996, or -similar laws prohibiting or restricting circumvention of such -measures. - - When you convey a covered work, you waive any legal power to forbid -circumvention of technological measures to the extent such circumvention -is effected by exercising rights under this License with respect to -the covered work, and you disclaim any intention to limit operation or -modification of the work as a means of enforcing, against the work's -users, your or third parties' legal rights to forbid circumvention of -technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you -receive it, in any medium, provided that you conspicuously and -appropriately publish on each copy an appropriate copyright notice; -keep intact all notices stating that this License and any -non-permissive terms added in accord with section 7 apply to the code; -keep intact all notices of the absence of any warranty; and give all -recipients a copy of this License along with the Program. - - You may charge any price or no price for each copy that you convey, -and you may offer support or warranty protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to -produce it from the Program, in the form of source code under the -terms of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified - it, and giving a relevant date. - - b) The work must carry prominent notices stating that it is - released under this License and any conditions added under section - 7. This requirement modifies the requirement in section 4 to - "keep intact all notices". - - c) You must license the entire work, as a whole, under this - License to anyone who comes into possession of a copy. This - License will therefore apply, along with any applicable section 7 - additional terms, to the whole of the work, and all its parts, - regardless of how they are packaged. This License gives no - permission to license the work in any other way, but it does not - invalidate such permission if you have separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your - work need not make them do so. - - A compilation of a covered work with other separate and independent -works, which are not by their nature extensions of the covered work, -and which are not combined with it such as to form a larger program, -in or on a volume of a storage or distribution medium, is called an -"aggregate" if the compilation and its resulting copyright are not -used to limit the access or legal rights of the compilation's users -beyond what the individual works permit. Inclusion of a covered work -in an aggregate does not cause this License to apply to the other -parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms -of sections 4 and 5, provided that you also convey the -machine-readable Corresponding Source under the terms of this License, -in one of these ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium - customarily used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a - written offer, valid for at least three years and valid for as - long as you offer spare parts or customer support for that product - model, to give anyone who possesses the object code either (1) a - copy of the Corresponding Source for all the software in the - product that is covered by this License, on a durable physical - medium customarily used for software interchange, for a price no - more than your reasonable cost of physically performing this - conveying of source, or (2) access to copy the - Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This - alternative is allowed only occasionally and noncommercially, and - only if you received the object code with such an offer, in accord - with subsection 6b. - - d) Convey the object code by offering access from a designated - place (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to - copy the object code is a network server, the Corresponding Source - may be on a different server (operated by you or a third party) - that supports equivalent copying facilities, provided you maintain - clear directions next to the object code saying where to find the - Corresponding Source. Regardless of what server hosts the - Corresponding Source, you remain obligated to ensure that it is - available for as long as needed to satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided - you inform other peers where the object code and Corresponding - Source of the work are being offered to the general public at no - charge under subsection 6d. - - A separable portion of the object code, whose source code is excluded -from the Corresponding Source as a System Library, need not be -included in conveying the object code work. - - A "User Product" is either (1) a "consumer product", which means any -tangible personal property which is normally used for personal, family, -or household purposes, or (2) anything designed or sold for incorporation -into a dwelling. In determining whether a product is a consumer product, -doubtful cases shall be resolved in favor of coverage. For a particular -product received by a particular user, "normally used" refers to a -typical or common use of that class of product, regardless of the status -of the particular user or of the way in which the particular user -actually uses, or expects or is expected to use, the product. A product -is a consumer product regardless of whether the product has substantial -commercial, industrial or non-consumer uses, unless such uses represent -the only significant mode of use of the product. - - "Installation Information" for a User Product means any methods, -procedures, authorization keys, or other information required to install -and execute modified versions of a covered work in that User Product from -a modified version of its Corresponding Source. The information must -suffice to ensure that the continued functioning of the modified object -code is in no case prevented or interfered with solely because -modification has been made. - - If you convey an object code work under this section in, or with, or -specifically for use in, a User Product, and the conveying occurs as -part of a transaction in which the right of possession and use of the -User Product is transferred to the recipient in perpetuity or for a -fixed term (regardless of how the transaction is characterized), the -Corresponding Source conveyed under this section must be accompanied -by the Installation Information. But this requirement does not apply -if neither you nor any third party retains the ability to install -modified object code on the User Product (for example, the work has -been installed in ROM). - - The requirement to provide Installation Information does not include a -requirement to continue to provide support service, warranty, or updates -for a work that has been modified or installed by the recipient, or for -the User Product in which it has been modified or installed. Access to a -network may be denied when the modification itself materially and -adversely affects the operation of the network or violates the rules and -protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, -in accord with this section must be in a format that is publicly -documented (and with an implementation available to the public in -source code form), and must require no special password or key for -unpacking, reading or copying. - - 7. Additional Terms. - - "Additional permissions" are terms that supplement the terms of this -License by making exceptions from one or more of its conditions. -Additional permissions that are applicable to the entire Program shall -be treated as though they were included in this License, to the extent -that they are valid under applicable law. If additional permissions -apply only to part of the Program, that part may be used separately -under those permissions, but the entire Program remains governed by -this License without regard to the additional permissions. - - When you convey a copy of a covered work, you may at your option -remove any additional permissions from that copy, or from any part of -it. (Additional permissions may be written to require their own -removal in certain cases when you modify the work.) You may place -additional permissions on material, added by you to a covered work, -for which you have or can give appropriate copyright permission. - - Notwithstanding any other provision of this License, for material you -add to a covered work, you may (if authorized by the copyright holders of -that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some - trade names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that - material by anyone who conveys the material (or modified versions of - it) with contractual assumptions of liability to the recipient, for - any liability that these contractual assumptions directly impose on - those licensors and authors. - - All other non-permissive additional terms are considered "further -restrictions" within the meaning of section 10. If the Program as you -received it, or any part of it, contains a notice stating that it is -governed by this License along with a term that is a further -restriction, you may remove that term. If a license document contains -a further restriction but permits relicensing or conveying under this -License, you may add to a covered work material governed by the terms -of that license document, provided that the further restriction does -not survive such relicensing or conveying. - - If you add terms to a covered work in accord with this section, you -must place, in the relevant source files, a statement of the -additional terms that apply to those files, or a notice indicating -where to find the applicable terms. - - Additional terms, permissive or non-permissive, may be stated in the -form of a separately written license, or stated as exceptions; -the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly -provided under this License. Any attempt otherwise to propagate or -modify it is void, and will automatically terminate your rights under -this License (including any patent licenses granted under the third -paragraph of section 11). - - However, if you cease all violation of this License, then your -license from a particular copyright holder is reinstated (a) -provisionally, unless and until the copyright holder explicitly and -finally terminates your license, and (b) permanently, if the copyright -holder fails to notify you of the violation by some reasonable means -prior to 60 days after the cessation. - - Moreover, your license from a particular copyright holder is -reinstated permanently if the copyright holder notifies you of the -violation by some reasonable means, this is the first time you have -received notice of violation of this License (for any work) from that -copyright holder, and you cure the violation prior to 30 days after -your receipt of the notice. - - Termination of your rights under this section does not terminate the -licenses of parties who have received copies or rights from you under -this License. If your rights have been terminated and not permanently -reinstated, you do not qualify to receive new licenses for the same -material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or -run a copy of the Program. Ancillary propagation of a covered work -occurring solely as a consequence of using peer-to-peer transmission -to receive a copy likewise does not require acceptance. However, -nothing other than this License grants you permission to propagate or -modify any covered work. These actions infringe copyright if you do -not accept this License. Therefore, by modifying or propagating a -covered work, you indicate your acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically -receives a license from the original licensors, to run, modify and -propagate that work, subject to this License. You are not responsible -for enforcing compliance by third parties with this License. - - An "entity transaction" is a transaction transferring control of an -organization, or substantially all assets of one, or subdividing an -organization, or merging organizations. If propagation of a covered -work results from an entity transaction, each party to that -transaction who receives a copy of the work also receives whatever -licenses to the work the party's predecessor in interest had or could -give under the previous paragraph, plus a right to possession of the -Corresponding Source of the work from the predecessor in interest, if -the predecessor has it or can get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the -rights granted or affirmed under this License. For example, you may -not impose a license fee, royalty, or other charge for exercise of -rights granted under this License, and you may not initiate litigation -(including a cross-claim or counterclaim in a lawsuit) alleging that -any patent claim is infringed by making, using, selling, offering for -sale, or importing the Program or any portion of it. - - 11. Patents. - - A "contributor" is a copyright holder who authorizes use under this -License of the Program or a work on which the Program is based. The -work thus licensed is called the contributor's "contributor version". - - A contributor's "essential patent claims" are all patent claims -owned or controlled by the contributor, whether already acquired or -hereafter acquired, that would be infringed by some manner, permitted -by this License, of making, using, or selling its contributor version, -but do not include claims that would be infringed only as a -consequence of further modification of the contributor version. For -purposes of this definition, "control" includes the right to grant -patent sublicenses in a manner consistent with the requirements of -this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free -patent license under the contributor's essential patent claims, to -make, use, sell, offer for sale, import and otherwise run, modify and -propagate the contents of its contributor version. - - In the following three paragraphs, a "patent license" is any express -agreement or commitment, however denominated, not to enforce a patent -(such as an express permission to practice a patent or covenant not to -sue for patent infringement). To "grant" such a patent license to a -party means to make such an agreement or commitment not to enforce a -patent against the party. - - If you convey a covered work, knowingly relying on a patent license, -and the Corresponding Source of the work is not available for anyone -to copy, free of charge and under the terms of this License, through a -publicly available network server or other readily accessible means, -then you must either (1) cause the Corresponding Source to be so -available, or (2) arrange to deprive yourself of the benefit of the -patent license for this particular work, or (3) arrange, in a manner -consistent with the requirements of this License, to extend the patent -license to downstream recipients. "Knowingly relying" means you have -actual knowledge that, but for the patent license, your conveying the -covered work in a country, or your recipient's use of the covered work -in a country, would infringe one or more identifiable patents in that -country that you have reason to believe are valid. - - If, pursuant to or in connection with a single transaction or -arrangement, you convey, or propagate by procuring conveyance of, a -covered work, and grant a patent license to some of the parties -receiving the covered work authorizing them to use, propagate, modify -or convey a specific copy of the covered work, then the patent license -you grant is automatically extended to all recipients of the covered -work and works based on it. - - A patent license is "discriminatory" if it does not include within -the scope of its coverage, prohibits the exercise of, or is -conditioned on the non-exercise of one or more of the rights that are -specifically granted under this License. You may not convey a covered -work if you are a party to an arrangement with a third party that is -in the business of distributing software, under which you make payment -to the third party based on the extent of your activity of conveying -the work, and under which the third party grants, to any of the -parties who would receive the covered work from you, a discriminatory -patent license (a) in connection with copies of the covered work -conveyed by you (or copies made from those copies), or (b) primarily -for and in connection with specific products or compilations that -contain the covered work, unless you entered into that arrangement, -or that patent license was granted, prior to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting -any implied license or other defenses to infringement that may -otherwise be available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot convey a -covered work so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you may -not convey it at all. For example, if you agree to terms that obligate you -to collect a royalty for further conveying from those to whom you convey -the Program, the only way you could satisfy both those terms and this -License would be to refrain entirely from conveying the Program. - - 13. Use with the GNU Affero General Public License. - - Notwithstanding any other provision of this License, you have -permission to link or combine any covered work with a work licensed -under version 3 of the GNU Affero General Public License into a single -combined work, and to convey the resulting work. The terms of this -License will continue to apply to the part which is the covered work, -but the special requirements of the GNU Affero General Public License, -section 13, concerning interaction through a network will apply to the -combination as such. - - 14. Revised Versions of this License. - - The Free Software Foundation may publish revised and/or new versions of -the GNU General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - - Each version is given a distinguishing version number. If the -Program specifies that a certain numbered version of the GNU General -Public License "or any later version" applies to it, you have the -option of following the terms and conditions either of that numbered -version or of any later version published by the Free Software -Foundation. If the Program does not specify a version number of the -GNU General Public License, you may choose any version ever published -by the Free Software Foundation. - - If the Program specifies that a proxy can decide which future -versions of the GNU General Public License can be used, that proxy's -public statement of acceptance of a version permanently authorizes you -to choose that version for the Program. - - Later license versions may give you additional or different -permissions. However, no additional obligations are imposed on any -author or copyright holder as a result of your choosing to follow a -later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY -APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT -HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY -OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM -IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF -ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS -THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY -GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE -USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF -DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD -PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), -EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF -SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided -above cannot be given local legal effect according to their terms, -reviewing courts shall apply local law that most closely approximates -an absolute waiver of all civil liability in connection with the -Program, unless a warranty or assumption of liability accompanies a -copy of the Program in return for a fee. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -state the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . - -Also add information on how to contact you by electronic and paper mail. - - If the program does terminal interaction, make it output a short -notice like this when it starts in an interactive mode: - - Copyright (C) - This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, your program's commands -might be different; for a GUI interface, you would use an "about box". - - You should also get your employer (if you work as a programmer) or school, -if any, to sign a "copyright disclaimer" for the program, if necessary. -For more information on this, and how to apply and follow the GNU GPL, see -. - - The GNU General Public License does not permit incorporating your program -into proprietary programs. If your program is a subroutine library, you -may consider it more useful to permit linking proprietary applications with -the library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. But first, please read -. + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. From 71da6b5c62f19ca3dd22dd8606037d6ece3f40dd Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Tue, 13 Feb 2024 11:24:33 -0500 Subject: [PATCH 02/10] changes requested for PR --- src/gfn/containers/trajectories.py | 7 +++--- src/gfn/env.py | 6 ++---- src/gfn/gflownet/base.py | 26 ++++++++++++----------- src/gfn/gflownet/detailed_balance.py | 15 +++++++------ src/gfn/gym/discrete_ebm.py | 20 ----------------- src/gfn/samplers.py | 15 +++++++------ src/gfn/states.py | 8 +++---- src/gfn/utils/common.py | 9 +++++--- src/gfn/utils/modules.py | 2 +- testing/test_samplers_and_trajectories.py | 2 +- 10 files changed, 49 insertions(+), 61 deletions(-) diff --git a/src/gfn/containers/trajectories.py b/src/gfn/containers/trajectories.py index e2e25f6f..5b0142e6 100644 --- a/src/gfn/containers/trajectories.py +++ b/src/gfn/containers/trajectories.py @@ -77,7 +77,7 @@ def __init__( self.env = env self.is_backward = is_backward self.states = ( - states.clone() # TODO: Do we need this clone? + states if states is not None else env.States.from_batch_shape(batch_shape=(0, 0)) ) @@ -169,8 +169,7 @@ def __getitem__(self, index: int | Sequence[int]) -> Trajectories: ) if is_tensor(self.estimator_outputs): - estimator_outputs = self.estimator_outputs[:, index] - estimator_outputs = estimator_outputs[:new_max_length] + estimator_outputs = self.estimator_outputs[..., index][:new_max_length] else: estimator_outputs = None @@ -261,7 +260,7 @@ def extend(self, other: Trajectories) -> None: other_shape = np.array(other.estimator_outputs.shape) required_first_dim = max(self_shape[0], other_shape[0]) - # TODO: This should be a single reused function. + # TODO: This should be a single reused function (#154) # The size of self needs to grow to match other along dim=0. if self_shape[0] < other_shape[0]: pad_dim = required_first_dim - self_shape[0] diff --git a/src/gfn/env.py b/src/gfn/env.py index c21f958b..bf2a3d3b 100644 --- a/src/gfn/env.py +++ b/src/gfn/env.py @@ -7,6 +7,7 @@ from gfn.actions import Actions from gfn.preprocessors import IdentityPreprocessor, Preprocessor from gfn.states import DiscreteStates, States +from gfn.utils.common import set_seed # Errors NonValidActionsError = type("NonValidActionsError", (ValueError,), {}) @@ -79,7 +80,7 @@ def reset( assert not (random and sink) if random and seed is not None: - torch.manual_seed(seed) # TODO: Improve seeding here? + set_seed(seed, performance_mode=True) if batch_shape is None: batch_shape = (1,) @@ -150,9 +151,6 @@ def step( new_not_done_states_tensor = self.maskless_step( not_done_states, not_done_actions ) - # TODO: Why is this here? Should it be removed? - # if isinstance(new_states, DiscreteStates): - # new_not_done_states.masks = self.update_masks(not_done_states, not_done_actions) new_states.tensor[~new_sink_states_idx] = new_not_done_states_tensor diff --git a/src/gfn/gflownet/base.py b/src/gfn/gflownet/base.py index 0656ba64..5e04151d 100644 --- a/src/gfn/gflownet/base.py +++ b/src/gfn/gflownet/base.py @@ -1,6 +1,6 @@ +import math from abc import ABC, abstractmethod from typing import Generic, Tuple, TypeVar, Union -import math import torch import torch.nn as nn @@ -26,12 +26,15 @@ class GFlowNet(ABC, nn.Module, Generic[TrainingSampleType]): """ @abstractmethod - def sample_trajectories(self, env: Env, n_samples: int) -> Trajectories: + def sample_trajectories( + self, env: Env, n_samples: int, sample_off_policy: bool + ) -> Trajectories: """Sample a specific number of complete trajectories. Args: env: the environment to sample trajectories from. n_samples: number of trajectories to be sampled. + sample_off_policy: whether to sample trajectories on / off policy. Returns: Trajectories: sampled trajectories object. """ @@ -48,12 +51,6 @@ def sample_terminating_states(self, env: Env, n_samples: int) -> States: trajectories = self.sample_trajectories(env, n_samples, sample_off_policy=False) return trajectories.last_states - def pf_pb_named_parameters(self): - return {k: v for k, v in self.named_parameters() if "pb" in k or "pf" in k} - - def pf_pb_parameters(self): - return [v for k, v in self.named_parameters() if "pb" in k or "pf" in k] - def logz_named_parameters(self): return {"logZ": dict(self.named_parameters())["logZ"]} @@ -97,6 +94,12 @@ def sample_trajectories( return trajectories + def pf_pb_named_parameters(self): + return {k: v for k, v in self.named_parameters() if "pb" in k or "pf" in k} + + def pf_pb_parameters(self): + return [v for k, v in self.named_parameters() if "pb" in k or "pf" in k] + class TrajectoryBasedGFlowNet(PFBasedGFlowNet[Trajectories]): def get_pfs_and_pbs( @@ -148,7 +151,7 @@ def get_pfs_and_pbs( if self.off_policy: # We re-use the values calculated in .sample_trajectories(). - if not isinstance(trajectories.estimator_outputs, type(None)): + if trajectories.estimator_outputs is not None: estimator_outputs = trajectories.estimator_outputs[ ~trajectories.actions.is_dummy ] @@ -211,9 +214,8 @@ def get_trajectories_scores( total_log_pb_trajectories = log_pb_trajectories.sum(dim=0) log_rewards = trajectories.log_rewards - if math.isfinite(self.log_reward_clip_min) and not isinstance( - log_rewards, type(None) - ): + # TODO: log_reward_clip_min isn't defined in base (#155). + if math.isfinite(self.log_reward_clip_min) and log_rewards is not None: log_rewards = log_rewards.clamp_min(self.log_reward_clip_min) if torch.any(torch.isinf(total_log_pf_trajectories)) or torch.any( diff --git a/src/gfn/gflownet/detailed_balance.py b/src/gfn/gflownet/detailed_balance.py index 818a2d8a..4cb4e6e2 100644 --- a/src/gfn/gflownet/detailed_balance.py +++ b/src/gfn/gflownet/detailed_balance.py @@ -35,12 +35,12 @@ def __init__( logF: ScalarEstimator, off_policy: bool, forward_looking: bool = False, - log_reward_clamp_min: float = -float("inf"), + log_reward_clip_min: float = -float("inf"), ): super().__init__(pf, pb, off_policy=off_policy) self.logF = logF self.forward_looking = forward_looking - self.log_reward_clamp_min = log_reward_clamp_min + self.log_reward_clip_min = log_reward_clip_min def get_scores( self, env: Env, transitions: Transitions @@ -68,10 +68,13 @@ def get_scores( if states.batch_shape != tuple(actions.batch_shape): raise ValueError("Something wrong happening with log_pf evaluations") - if self.off_policy: + if not self.off_policy: valid_log_pf_actions = transitions.log_probs else: # Evaluate the log PF of the actions sampled off policy. + # I suppose the Transitions container should then have some + # estimator_outputs attribute as well, to avoid duplication here ? + # See (#156). module_output = self.pf(states) # TODO: Inefficient duplication. valid_log_pf_actions = self.pf.to_probability_distribution( states, module_output @@ -82,8 +85,8 @@ def get_scores( valid_log_F_s = self.logF(states).squeeze(-1) if self.forward_looking: log_rewards = env.log_reward(states) # TODO: RM unsqueeze(-1) ? - if math.isfinite(self.log_reward_clamp_min): - log_rewards = log_rewards.clamp_min(self.log_reward_clamp_min) + if math.isfinite(self.log_reward_clip_min): + log_rewards = log_rewards.clamp_min(self.log_reward_clip_min) valid_log_F_s = valid_log_F_s + log_rewards preds = valid_log_pf_actions + valid_log_F_s @@ -163,7 +166,7 @@ def get_scores(self, transitions: Transitions) -> TT["n_trajectories", torch.flo all_log_rewards = transitions.all_log_rewards[mask] module_output = self.pf(states) pf_dist = self.pf.to_probability_distribution(states, module_output) - if self.off_policy: + if not self.off_policy: valid_log_pf_actions = transitions[mask].log_probs else: # Evaluate the log PF of the actions sampled off policy. diff --git a/src/gfn/gym/discrete_ebm.py b/src/gfn/gym/discrete_ebm.py index ecd05eea..a4f82735 100644 --- a/src/gfn/gym/discrete_ebm.py +++ b/src/gfn/gym/discrete_ebm.py @@ -116,26 +116,6 @@ def make_random_states_tensor( device=env.device, ) - # TODO: Look into make masks - I don't think this is being called. - def make_masks( - self, - ) -> Tuple[ - TT["batch_shape", "n_actions", torch.bool], - TT["batch_shape", "n_actions - 1", torch.bool], - ]: - forward_masks = torch.zeros( - self.batch_shape + (env.n_actions,), - device=env.device, - dtype=torch.bool, - ) - backward_masks = torch.zeros( - self.batch_shape + (env.n_actions - 1,), - device=env.device, - dtype=torch.bool, - ) - - return forward_masks, backward_masks - def update_masks(self) -> None: self.set_default_typing() self.forward_masks[..., : env.ndim] = self.tensor == -1 diff --git a/src/gfn/samplers.py b/src/gfn/samplers.py index 92781664..56cd83de 100644 --- a/src/gfn/samplers.py +++ b/src/gfn/samplers.py @@ -91,7 +91,7 @@ def sample_trajectories( off_policy: bool, states: Optional[States] = None, n_trajectories: Optional[int] = None, - test_mode: bool = False, + debug_mode: bool = False, **policy_kwargs, ) -> Trajectories: """Sample trajectories sequentially. @@ -110,7 +110,7 @@ def sample_trajectories( parameter, `epsilon`, and `sf_bias`. In the continuous case these kwargs will be user defined. This can be used to, for example, sample off-policy. - test_mode: if True, everything gets calculated. + debug_mode: if True, everything gets calculated. Returns: A Trajectories object representing the batch of sampled trajectories. @@ -118,8 +118,8 @@ def sample_trajectories( AssertionError: When both states and n_trajectories are specified. AssertionError: When states are not linear. """ - save_estimator_outputs = off_policy or test_mode - skip_logprob_calculaion = off_policy and not test_mode + save_estimator_outputs = off_policy or debug_mode + skip_logprob_calculaion = off_policy and not debug_mode if states is None: assert ( @@ -173,7 +173,7 @@ def sample_trajectories( calculate_logprobs=False if skip_logprob_calculaion else True, **policy_kwargs, ) - if not isinstance(estimator_outputs, type(None)): + if estimator_outputs is not None: # Place estimator outputs into a stackable tensor. Note that this # will be replaced with torch.nested.nested_tensor in the future. estimator_outputs_padded = torch.full( @@ -202,11 +202,14 @@ def sample_trajectories( # Increment the step, determine which trajectories are finisihed, and eval # rewards. step += 1 + # new_dones means those trajectories that just finished. Because we + # pad the sink state to every short trajectory, we need to make sure + # to filter out the already done ones. new_dones = ( new_states.is_initial_state if self.estimator.is_backward else sink_states_mask - ) & ~dones # TODO: why is ~dones used here and again later on? Is this intentional? + ) & ~dones trajectories_dones[new_dones & ~dones] = step try: trajectories_log_rewards[new_dones & ~dones] = env.log_reward( diff --git a/src/gfn/states.py b/src/gfn/states.py index f5d63a4e..e50b6aea 100644 --- a/src/gfn/states.py +++ b/src/gfn/states.py @@ -1,6 +1,7 @@ from __future__ import annotations # This allows to use the class name in type hints from abc import ABC, abstractmethod +from copy import deepcopy from math import prod from typing import ClassVar, Optional, Sequence, cast @@ -133,7 +134,7 @@ def __getitem__(self, index: int | Sequence[int] | Sequence[bool]) -> States: """Access particular states of the batch.""" return self.__class__( self.tensor[index] - ) # TODO: Inefficient - this make a copy of the tensor! + ) # TODO: Inefficient - this might make a copy of the tensor! def __setitem__( self, index: int | Sequence[int] | Sequence[bool], states: States @@ -142,9 +143,8 @@ def __setitem__( self.tensor[index] = states.tensor def clone(self) -> States: - """Returns a clone of the current instance.""" - # TODO: Do we need to copy _log_rewards? - return self.__class__(self.tensor.detach().clone()) + """Returns a *detached* clone of the current instance using deepcopy.""" + return deepcopy(self) def flatten(self) -> States: """Flatten the batch dimension of the states. diff --git a/src/gfn/utils/common.py b/src/gfn/utils/common.py index a80890c5..75a9ffe8 100644 --- a/src/gfn/utils/common.py +++ b/src/gfn/utils/common.py @@ -72,11 +72,14 @@ def validate( return validation_info -def set_seed(seed: int) -> None: +def set_seed(seed: int, performance_mode: bool = False) -> None: """Used to control randomness.""" torch.manual_seed(seed) random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) - torch.backends.cudnn.deterministic = True - torch.backends.cudnn.benchmark = False + + # These are only set when we care about reproducibility over performance. + if not performance_mode: + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False diff --git a/src/gfn/utils/modules.py b/src/gfn/utils/modules.py index f99aa22d..2ffbf54a 100644 --- a/src/gfn/utils/modules.py +++ b/src/gfn/utils/modules.py @@ -50,7 +50,7 @@ def __init__( arch.append(nn.Linear(hidden_dim, hidden_dim)) arch.append(activation()) self.torso = nn.Sequential(*arch) - self.torso.hidden_dim = hidden_dim # TODO: what is this? + self.torso.hidden_dim = hidden_dim else: self.torso = torso self.last_layer = nn.Linear(self.torso.hidden_dim, output_dim) diff --git a/testing/test_samplers_and_trajectories.py b/testing/test_samplers_and_trajectories.py index 77eca901..1ff865f5 100644 --- a/testing/test_samplers_and_trajectories.py +++ b/testing/test_samplers_and_trajectories.py @@ -80,7 +80,7 @@ def trajectory_sampling_with_return( sampler = Sampler(estimator=pf_estimator) # Test mode collects log_probs and estimator_ouputs, not encountered in the wild. - trajectories = sampler.sample_trajectories(env, off_policy=False, n_trajectories=5, test_mode=True) + trajectories = sampler.sample_trajectories(env, off_policy=False, n_trajectories=5, debug_mode=True) # trajectories = sampler.sample_trajectories(env, n_trajectories=10) # TODO - why is this duplicated? states = env.reset(batch_shape=5, random=True) From bafa1ad54c0d377756cedd8814a78f7f623a7c3c Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Tue, 13 Feb 2024 14:06:42 -0500 Subject: [PATCH 03/10] moved training specific imports here to avoid circular deps --- src/gfn/utils/training.py | 69 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 src/gfn/utils/training.py diff --git a/src/gfn/utils/training.py b/src/gfn/utils/training.py new file mode 100644 index 00000000..9144154b --- /dev/null +++ b/src/gfn/utils/training.py @@ -0,0 +1,69 @@ +from collections import Counter +from typing import Dict, Optional + +import torch +from torchtyping import TensorType as TT + +from gfn.env import Env +from gfn.gflownet import GFlowNet, TBGFlowNet +from gfn.states import States + + +def get_terminating_state_dist_pmf(env: Env, states: States) -> TT["n_states", float]: + states_indices = env.get_terminating_states_indices(states).cpu().numpy().tolist() + counter = Counter(states_indices) + counter_list = [ + counter[state_idx] if state_idx in counter else 0 + for state_idx in range(env.n_terminating_states) + ] + + return torch.tensor(counter_list, dtype=torch.float) / len(states_indices) + + +def validate( + env: Env, + gflownet: GFlowNet, + n_validation_samples: int = 1000, + visited_terminating_states: Optional[States] = None, +) -> Dict[str, float]: + """Evaluates the current gflownet on the given environment. + + This is for environments with known target reward. The validation is done by + computing the l1 distance between the learned empirical and the target + distributions. + + Args: + env: The environment to evaluate the gflownet on. + gflownet: The gflownet to evaluate. + n_validation_samples: The number of samples to use to evaluate the pmf. + visited_terminating_states: The terminating states visited during training. If given, the pmf is obtained from + these last n_validation_samples states. Otherwise, n_validation_samples are resampled for evaluation. + + Returns: A dictionary containing the l1 validation metric. If the gflownet + is a TBGFlowNet, i.e. contains LogZ, then the (absolute) difference + between the learned and the target LogZ is also returned in the dictionary. + """ + + true_logZ = env.log_partition + true_dist_pmf = env.true_dist_pmf + if isinstance(true_dist_pmf, torch.Tensor): + true_dist_pmf = true_dist_pmf.cpu() + else: + # The environment does not implement a true_dist_pmf property, nor a log_partition property + # We cannot validate the gflownet + return {} + + logZ = None + if isinstance(gflownet, TBGFlowNet): + logZ = gflownet.logZ.item() + if visited_terminating_states is None: + terminating_states = gflownet.sample_terminating_states(n_validation_samples) + else: + terminating_states = visited_terminating_states[-n_validation_samples:] + + final_states_dist_pmf = get_terminating_state_dist_pmf(env, terminating_states) + l1_dist = (final_states_dist_pmf - true_dist_pmf).abs().mean().item() + validation_info = {"l1_dist": l1_dist} + if logZ is not None: + validation_info["logZ_diff"] = abs(logZ - true_logZ) + return validation_info From 0990d51f2c48e7be4d476962c665172b16d5c576 Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Tue, 13 Feb 2024 14:07:00 -0500 Subject: [PATCH 04/10] circular deps fix --- src/gfn/utils/common.py | 66 ----------------------------------------- 1 file changed, 66 deletions(-) diff --git a/src/gfn/utils/common.py b/src/gfn/utils/common.py index 75a9ffe8..e00b47d2 100644 --- a/src/gfn/utils/common.py +++ b/src/gfn/utils/common.py @@ -1,76 +1,10 @@ import random -from collections import Counter -from typing import Dict, Optional import numpy as np import torch -from torchtyping import TensorType as TT -from gfn.containers import Trajectories, Transitions -from gfn.env import Env -from gfn.gflownet import GFlowNet, TBGFlowNet -from gfn.states import States -def get_terminating_state_dist_pmf(env: Env, states: States) -> TT["n_states", float]: - states_indices = env.get_terminating_states_indices(states).cpu().numpy().tolist() - counter = Counter(states_indices) - counter_list = [ - counter[state_idx] if state_idx in counter else 0 - for state_idx in range(env.n_terminating_states) - ] - - return torch.tensor(counter_list, dtype=torch.float) / len(states_indices) - - -def validate( - env: Env, - gflownet: GFlowNet, - n_validation_samples: int = 1000, - visited_terminating_states: Optional[States] = None, -) -> Dict[str, float]: - """Evaluates the current gflownet on the given environment. - - This is for environments with known target reward. The validation is done by - computing the l1 distance between the learned empirical and the target - distributions. - - Args: - env: The environment to evaluate the gflownet on. - gflownet: The gflownet to evaluate. - n_validation_samples: The number of samples to use to evaluate the pmf. - visited_terminating_states: The terminating states visited during training. If given, the pmf is obtained from - these last n_validation_samples states. Otherwise, n_validation_samples are resampled for evaluation. - - Returns: A dictionary containing the l1 validation metric. If the gflownet - is a TBGFlowNet, i.e. contains LogZ, then the (absolute) difference - between the learned and the target LogZ is also returned in the dictionary. - """ - - true_logZ = env.log_partition - true_dist_pmf = env.true_dist_pmf - if isinstance(true_dist_pmf, torch.Tensor): - true_dist_pmf = true_dist_pmf.cpu() - else: - # The environment does not implement a true_dist_pmf property, nor a log_partition property - # We cannot validate the gflownet - return {} - - logZ = None - if isinstance(gflownet, TBGFlowNet): - logZ = gflownet.logZ.item() - if visited_terminating_states is None: - terminating_states = gflownet.sample_terminating_states(n_validation_samples) - else: - terminating_states = visited_terminating_states[-n_validation_samples:] - - final_states_dist_pmf = get_terminating_state_dist_pmf(env, terminating_states) - l1_dist = (final_states_dist_pmf - true_dist_pmf).abs().mean().item() - validation_info = {"l1_dist": l1_dist} - if logZ is not None: - validation_info["logZ_diff"] = abs(logZ - true_logZ) - return validation_info - def set_seed(seed: int, performance_mode: bool = False) -> None: """Used to control randomness.""" From aa3c656edd09316c2ffa30574edec05be78d3259 Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Wed, 14 Feb 2024 18:01:19 -0500 Subject: [PATCH 05/10] removing addiditons (additions commented out) --- src/gfn/containers/trajectories.py | 37 +++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/src/gfn/containers/trajectories.py b/src/gfn/containers/trajectories.py index 5b0142e6..7fad1416 100644 --- a/src/gfn/containers/trajectories.py +++ b/src/gfn/containers/trajectories.py @@ -168,8 +168,40 @@ def __getitem__(self, index: int | Sequence[int]) -> Trajectories: self._log_rewards[index] if self._log_rewards is not None else None ) + + # def _repeat_to_match(a: torch.Tensor, b: torch.Tensor): + # """ + # Repeats a along as many dimensions as required to match the + # dimensionality of b, skipping the first dimension of b. + # """ + # if a.shape == b.shape[1:]: # We don't consider the trajectory len. + # return a + # else: + # # Repeats each end dimension, skipping the first one, if + # # required. + # n = len(a.shape) + # for i, dim in enumerate(b.shape[1:]): + # if i + 1 > n: + # a = a.unsqueeze(-1).repeat((1,) * i + (dim,)) + # else: + # assert a.shape[i] == b.shape[i + 1] + + # assert a.shape == b.shape[1:] + + # return a + if is_tensor(self.estimator_outputs): - estimator_outputs = self.estimator_outputs[..., index][:new_max_length] + # TODO: Is there a safer way to index self.estimator_outputs? + # + # First we index along the first dimension of the estimator outputs. + # This can be thought of as the instance dimension, and is + # compatible with all supported indexing approaches (dim=1). + # All dims > 1 are not explicitly indexed unless the dimensionality + # of `index` matches all dimensions of `estimator_outputs` aside + # from the first (trajectory) dimension. + estimator_outputs = self.estimator_outputs[:, index] + # Next we index along the trajectory length (dim=0) + estimator_outputs = estimator_outputs[:new_max_length] else: estimator_outputs = None @@ -217,6 +249,9 @@ def extend(self, other: Trajectories) -> None: Args: other: an external set of Trajectories. """ + if len(other) == 0: + return + # TODO: The replay buffer is storing `dones` - this wastes a lot of space. self.actions.extend(other.actions) self.states.extend(other.states) From e2ad9ddae9844253680ff4aa098e22bfb82cef80 Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Wed, 14 Feb 2024 18:02:28 -0500 Subject: [PATCH 06/10] formatting common --- src/gfn/utils/common.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/gfn/utils/common.py b/src/gfn/utils/common.py index e00b47d2..cc5b97a7 100644 --- a/src/gfn/utils/common.py +++ b/src/gfn/utils/common.py @@ -4,8 +4,6 @@ import torch - - def set_seed(seed: int, performance_mode: bool = False) -> None: """Used to control randomness.""" torch.manual_seed(seed) From be122ed785eb024a9a881d5b5697e1508e1f84d6 Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Wed, 14 Feb 2024 18:03:03 -0500 Subject: [PATCH 07/10] indexing reverted to old strategy with copius documentation --- src/gfn/containers/trajectories.py | 26 ++------------------------ 1 file changed, 2 insertions(+), 24 deletions(-) diff --git a/src/gfn/containers/trajectories.py b/src/gfn/containers/trajectories.py index 7fad1416..3ca3b47e 100644 --- a/src/gfn/containers/trajectories.py +++ b/src/gfn/containers/trajectories.py @@ -167,31 +167,9 @@ def __getitem__(self, index: int | Sequence[int]) -> Trajectories: log_rewards = ( self._log_rewards[index] if self._log_rewards is not None else None ) - - - # def _repeat_to_match(a: torch.Tensor, b: torch.Tensor): - # """ - # Repeats a along as many dimensions as required to match the - # dimensionality of b, skipping the first dimension of b. - # """ - # if a.shape == b.shape[1:]: # We don't consider the trajectory len. - # return a - # else: - # # Repeats each end dimension, skipping the first one, if - # # required. - # n = len(a.shape) - # for i, dim in enumerate(b.shape[1:]): - # if i + 1 > n: - # a = a.unsqueeze(-1).repeat((1,) * i + (dim,)) - # else: - # assert a.shape[i] == b.shape[i + 1] - - # assert a.shape == b.shape[1:] - - # return a - if is_tensor(self.estimator_outputs): - # TODO: Is there a safer way to index self.estimator_outputs? + # TODO: Is there a safer way to index self.estimator_outputs for + # for n-dimensional estimator outputs? # # First we index along the first dimension of the estimator outputs. # This can be thought of as the instance dimension, and is From cfc560c89b6942bb9d77efd9c1a61bbaf02a8970 Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Wed, 14 Feb 2024 18:03:33 -0500 Subject: [PATCH 08/10] formatting of tests --- testing/test_parametrizations_and_losses.py | 8 ++++++-- testing/test_samplers_and_trajectories.py | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/testing/test_parametrizations_and_losses.py b/testing/test_parametrizations_and_losses.py index b33bb9ef..f2e725bf 100644 --- a/testing/test_parametrizations_and_losses.py +++ b/testing/test_parametrizations_and_losses.py @@ -208,7 +208,9 @@ def PFBasedGFlowNet_with_return( else: raise ValueError(f"Unknown gflownet {gflownet_name}") - trajectories = gflownet.sample_trajectories(env, sample_off_policy=False, n_samples=10) + trajectories = gflownet.sample_trajectories( + env, sample_off_policy=False, n_samples=10 + ) training_objects = gflownet.to_training_samples(trajectories) _ = gflownet.loss(env, training_objects) @@ -305,7 +307,9 @@ def test_subTB_vs_TB( zero_logF=True, ) - trajectories = gflownet.sample_trajectories(env, sample_off_policy=False, n_samples=10) + trajectories = gflownet.sample_trajectories( + env, sample_off_policy=False, n_samples=10 + ) subtb_loss = gflownet.loss(env, trajectories) if weighting == "TB": diff --git a/testing/test_samplers_and_trajectories.py b/testing/test_samplers_and_trajectories.py index 1ff865f5..71bdbc04 100644 --- a/testing/test_samplers_and_trajectories.py +++ b/testing/test_samplers_and_trajectories.py @@ -80,12 +80,16 @@ def trajectory_sampling_with_return( sampler = Sampler(estimator=pf_estimator) # Test mode collects log_probs and estimator_ouputs, not encountered in the wild. - trajectories = sampler.sample_trajectories(env, off_policy=False, n_trajectories=5, debug_mode=True) + trajectories = sampler.sample_trajectories( + env, off_policy=False, n_trajectories=5, debug_mode=True + ) # trajectories = sampler.sample_trajectories(env, n_trajectories=10) # TODO - why is this duplicated? states = env.reset(batch_shape=5, random=True) bw_sampler = Sampler(estimator=pb_estimator) - bw_trajectories = bw_sampler.sample_trajectories(env, off_policy=False, states=states) + bw_trajectories = bw_sampler.sample_trajectories( + env, off_policy=False, states=states + ) return trajectories, bw_trajectories, pf_estimator, pb_estimator From 2bebde2c9e5312da971c897e01a060b86502ca33 Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Wed, 14 Feb 2024 18:05:37 -0500 Subject: [PATCH 09/10] isort / black --- tutorials/examples/train_box.py | 5 +---- tutorials/examples/train_discreteebm.py | 10 +++------- tutorials/examples/train_hypergrid.py | 10 +++++----- tutorials/examples/train_line.py | 9 ++++----- 4 files changed, 13 insertions(+), 21 deletions(-) diff --git a/tutorials/examples/train_box.py b/tutorials/examples/train_box.py index 0ea3e913..5a3cf8dd 100644 --- a/tutorials/examples/train_box.py +++ b/tutorials/examples/train_box.py @@ -6,7 +6,6 @@ python train_box.py --delta {0.1, 0.25} --tied {--uniform_pb} --loss {TB, DB} """ - from argparse import ArgumentParser import numpy as np @@ -233,9 +232,7 @@ def main(args): # noqa: C901 print(f"current optimizer LR: {optimizer.param_groups[0]['lr']}") trajectories = gflownet.sample_trajectories( - env, - sample_off_policy=False, - n_samples=args.batch_size + env, sample_off_policy=False, n_samples=args.batch_size ) training_samples = gflownet.to_training_samples(trajectories) diff --git a/tutorials/examples/train_discreteebm.py b/tutorials/examples/train_discreteebm.py index 68b1ba9f..33aa1cc8 100644 --- a/tutorials/examples/train_discreteebm.py +++ b/tutorials/examples/train_discreteebm.py @@ -10,7 +10,6 @@ [Learning GFlowNets from partial episodes for improved convergence and stability](https://arxiv.org/abs/2209.12782) python train_hypergrid.py --ndim {2, 4} --height 12 --R0 {1e-3, 1e-4} --tied --loss {TB, DB, SubTB} """ - from argparse import ArgumentParser import torch @@ -20,10 +19,9 @@ from gfn.gflownet import FMGFlowNet from gfn.gym import DiscreteEBM from gfn.modules import DiscretePolicyEstimator -from gfn.utils.common import validate -from gfn.utils.modules import NeuralNet, Tabular - from gfn.utils.common import set_seed +from gfn.utils.modules import NeuralNet, Tabular +from gfn.utils.training import validate DEFAULT_SEED = 4444 @@ -72,9 +70,7 @@ def main(args): # noqa: C901 validation_info = {"l1_dist": float("inf")} for iteration in trange(n_iterations): trajectories = gflownet.sample_trajectories( - env, - off_policy=False, - n_samples=args.batch_size + env, off_policy=False, n_samples=args.batch_size ) training_samples = gflownet.to_training_samples(trajectories) diff --git a/tutorials/examples/train_hypergrid.py b/tutorials/examples/train_hypergrid.py index 113df50f..e3301cdd 100644 --- a/tutorials/examples/train_hypergrid.py +++ b/tutorials/examples/train_hypergrid.py @@ -10,7 +10,6 @@ [Learning GFlowNets from partial episodes for improved convergence and stability](https://arxiv.org/abs/2209.12782) python train_hypergrid.py --ndim {2, 4} --height 12 --R0 {1e-3, 1e-4} --tied --loss {TB, DB, SubTB} """ - from argparse import ArgumentParser import torch @@ -28,10 +27,9 @@ ) from gfn.gym import HyperGrid from gfn.modules import DiscretePolicyEstimator, ScalarEstimator -from gfn.utils.common import validate -from gfn.utils.modules import DiscreteUniform, NeuralNet, Tabular - from gfn.utils.common import set_seed +from gfn.utils.modules import DiscreteUniform, NeuralNet, Tabular +from gfn.utils.training import validate DEFAULT_SEED = 4444 @@ -225,7 +223,9 @@ def main(args): # noqa: C901 n_iterations = args.n_trajectories // args.batch_size validation_info = {"l1_dist": float("inf")} for iteration in trange(n_iterations): - trajectories = gflownet.sample_trajectories(env, n_samples=args.batch_size, sample_off_policy=off_policy_sampling) + trajectories = gflownet.sample_trajectories( + env, n_samples=args.batch_size, sample_off_policy=off_policy_sampling + ) training_samples = gflownet.to_training_samples(trajectories) if replay_buffer is not None: with torch.no_grad(): diff --git a/tutorials/examples/train_line.py b/tutorials/examples/train_line.py index 3d0042e5..645a6f06 100644 --- a/tutorials/examples/train_line.py +++ b/tutorials/examples/train_line.py @@ -1,4 +1,3 @@ -import random from typing import ClassVar, Literal, Tuple import matplotlib.pyplot as plt @@ -15,7 +14,6 @@ from gfn.modules import GFNModule from gfn.states import States from gfn.utils import NeuralNet - from gfn.utils.common import set_seed @@ -215,7 +213,9 @@ def log_prob(self, sampled_actions): actions_to_eval[~exit_idx] = sampled_actions[~exit_idx] if sum(~exit_idx) > 0: - logprobs[~exit_idx] = self.dist.log_prob(actions_to_eval)[~exit_idx].unsqueeze(-1) + logprobs[~exit_idx] = self.dist.log_prob(actions_to_eval)[ + ~exit_idx + ].unsqueeze(-1) return logprobs.squeeze(-1) @@ -289,6 +289,7 @@ def to_probability_distribution( n_steps=self.n_steps_per_trajectory, ) + def train( gflownet, env, @@ -322,7 +323,6 @@ def train( scale_schedule = np.linspace(exploration_var_starting_val, 0, n_iterations) for iteration in tbar: - optimizer.zero_grad() # Off Policy Sampling. trajectories = gflownet.sample_trajectories( @@ -361,7 +361,6 @@ def train( if __name__ == "__main__": - environment = Line( mus=[2, 5], sigmas=[0.5, 0.5], From e7c7453fd1b97183c5ac6191d187f128a258dfa5 Mon Sep 17 00:00:00 2001 From: Joseph Viviano Date: Wed, 14 Feb 2024 18:05:49 -0500 Subject: [PATCH 10/10] isort --- tutorials/examples/test_scripts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/examples/test_scripts.py b/tutorials/examples/test_scripts.py index b7510d5a..ae592a97 100644 --- a/tutorials/examples/test_scripts.py +++ b/tutorials/examples/test_scripts.py @@ -5,8 +5,8 @@ from dataclasses import dataclass -import pytest import numpy as np +import pytest from .train_box import main as train_box_main from .train_discreteebm import main as train_discreteebm_main