diff --git a/.deps b/.deps new file mode 100644 index 0000000..e69de29 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..67929ca --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +.vscode +*.slo +debian/debhelper-build-stamp +debian/files +debian/libapache2-mod-log-ipmask +debian/*.debhelper.log +debian/*.debhelper +debian/*.substvars diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..0ab98d3 --- /dev/null +++ b/Makefile @@ -0,0 +1,49 @@ +## +## Makefile -- Build procedure for sample log_ipmask Apache module +## Autogenerated via ``apxs -n log_ipmask -g''. +## + +builddir=. +top_srcdir=/usr/share/apache2 +top_builddir=/usr/share/apache2 +include /usr/share/apache2/build/special.mk + +CXX=c++ +CXXFLAGS=-std=c++11 +EXTRA_LDFLAGS=-lstdc++ + +# the used tools +APACHECTL=apachectl + +# additional defines, includes and libraries +#DEFS=-Dmy_define=my_value +#INCLUDES=-Imy/include/dir +#LIBS=-Lmy/lib/dir -lmylib + +# the default target +all: local-shared-build + +# install the shared object file into Apache +install: install-modules-yes + +# cleanup +clean: + -rm -f mod_log_ipmask.o mod_log_ipmask.lo mod_log_ipmask.slo mod_log_ipmask.la + +# simple test +test: reload + lynx -mime_header http://localhost/log_ipmask + +# install and activate shared object by reloading Apache to +# force a reload of the shared object file +reload: install restart + +# the general Apache start/restart/stop +# procedures +start: + $(APACHECTL) start +restart: + $(APACHECTL) restart +stop: + $(APACHECTL) stop + diff --git a/NOTICE.txt b/NOTICE.txt new file mode 100644 index 0000000..2397c6b --- /dev/null +++ b/NOTICE.txt @@ -0,0 +1,14 @@ +mod_log_ipmask module for the Apache HTTP Server +Copyright 2018 aquenos GmbH. + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +Portions of this software were developed at the National Center +for Supercomputing Applications (NCSA) at the University of +Illinois at Urbana-Champaign. + +This software contains code derived from the RSA Data Security +Inc. MD5 Message-Digest Algorithm, including various +modifications by Spyglass Inc., Carnegie Mellon University, and +Bell Communications Research, Inc (Bellcore). diff --git a/README.md b/README.md new file mode 100644 index 0000000..b2c8435 --- /dev/null +++ b/README.md @@ -0,0 +1,101 @@ +mod_log_ipmask +============== + +The mod_log_ipmask module is designed to work with version 2.4 of the Apache +HTTP Server. It extends the mod_log_config module by overriding the `%a` and +`%h` format strings in order to limit the number of IP address bits that are +included in log files. This is intended for applications where partial logging +of IP addresses is desired, but full IP addresses may not be logged due to +privacy concerns. + + +Installation +------------ + +On Debian-based systems, a package can be built from source by running `debuild` +in the source tree. Subsequently, the generated `.deb` package can be installed +with `dpkg`. The module has been developped for Ubuntu 16.04 LTS. When building +it on other Debian-based distributions, slight modifications to the `Makefile` +may be necessary. + +On other systems, the module can be built by running `make` and installed by +running `make install`. The `Makefile` may need to be adapted if the +configuration of the target system differs from Ubuntu 16.04 LTS (e.g. different +Apache installation path, different compiler name or flags). + + +Configuration +------------- + +When using the Debian package, the module is automatically enabled during +installation. The default configuration file can be found in +`/etc/apache2/mods-available/log_ipmask.conf`. + +When installing the module using `make install` the following line needs to be +added to the Apache HTTP Server configuration file in order to load the module. + +``` +LoadModule log_ipmask_module /usr/lib/apache2/modules/mod_log_ipmask.so +``` + +Obviously, the path to the module DSO file may need to be adjusted. + +The default configuration file distributed with the Debian package contains the +following configuration directives: + +``` +LogDefaultIPv4Mask 24 +LogDefaultIPv6Mask 56 +``` + +This limits the logging of IPv4 addresses to their first 24 bits and the logging +of IPv6 addresses to their first 56 bits. If these directives are not specified, +the module logs the full IP addresses by default. + +The `LogDefaultIPv4Mask` and `LogDefaultIPv6Mask` directives may be used on the +server config or virtual host levels. Settings specified on the virtual host +level override settings specified on the server config level. + +In order to not log any bits of the IP address, the mask can be set to zero. +This results in the address `0.0.0.0` being logged for IPv4 addresses and the +address `::` being logged for IPv6 addresses. In order to enable the logging of +full IP addresses, `LogDefaultIPv4Mask` can be set to `32` and +`LogDefaultIPv6Mask` can be set to `128`. These are also the default settings if +the directives are not specified at all. + +The mask can also be configured on a per-log basis by adding the mask to the +parameters of the `%a` or `%h` format string. + +Examples: + +- `%{8|16}a` logs the first 8 bits of IPv4 addresses and the first 16 bits of + IPv6 addresses. +- `%{c|8|16}a` logs the first 8 bits of IPv4 addresses and the first 16 bits of + IPv6 addresses, but uses the peer IP address of the connection (as + described in the documentation of + [mod_log_config](http://httpd.apache.org/docs/2.4/mod/mod_log_config.html)). +- `%a` logs the address according to the settings specified by + `LogDefaultIPv4Mask` and `LogDefaultIPv6Mask`. +- `%{c}a` logs the address according to the settings specified by + `LogDefaultIPv4Mask` and `LogDefaultIPv6Mask`, but uses the peer IP address of + the connection (as described in the documentation of + [mod_log_config](http://httpd.apache.org/docs/2.4/mod/mod_log_config.html)). +- `%{8|16}h` logs the first 8 bits of IPv4 addresses and the first 16 bits of + IPv6 addresses. If the remote host name has been resolved, it is logged as is. +- `%a` logs the address according to the settings specified by + `LogDefaultIPv4Mask` and `LogDefaultIPv6Mask`. If the remote host name has + been resolved, it is logged as is. + + +Limitations +----------- + +When using the `%h` format string, only IP addresses are masked. If the IP +address has been resolved into a hostname, the hostname is kept as-is. + + +Trivia +------ + +Despite its name, this module does not share any code with the mod_log_ipmask +module from https://github.com/webfactory/mod_log_ipmask/. diff --git a/debian/changelog b/debian/changelog new file mode 100644 index 0000000..3f6767e --- /dev/null +++ b/debian/changelog @@ -0,0 +1,5 @@ +apache2-mod-log-ipmask (1.0.0) stable; urgency=low + + * Initial release. + + -- Sebastian Marsching Thu, 01 Mar 2018 16:20:18 +0100 diff --git a/debian/compat b/debian/compat new file mode 100644 index 0000000..ec63514 --- /dev/null +++ b/debian/compat @@ -0,0 +1 @@ +9 diff --git a/debian/control b/debian/control new file mode 100644 index 0000000..8726567 --- /dev/null +++ b/debian/control @@ -0,0 +1,14 @@ +Source: apache2-mod-log-ipmask +Maintainer: Sebastian Marsching +Section: web +Priority: extra +Standards-Version: 3.9.7 +Build-Depends: apache2-dev (>= 2.4), debhelper (>= 9), dh-apache2 + +Package: libapache2-mod-log-ipmask +Architecture: any +Depends: ${shlibs:Depends}, ${misc:Depends} +Description: Apache module for masking parts of client IP addresses in log + files. This can be used to hide a configurable number of lower bits when + writing to the log file in order to comply with privacy regulations. This + module can deal with both IPv4 and IPv6 addresses. diff --git a/debian/copyright b/debian/copyright new file mode 100644 index 0000000..d30e087 --- /dev/null +++ b/debian/copyright @@ -0,0 +1,23 @@ +Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ +Upstream-Name: mod-log-ipmask +Source: https://github.com/aquenos/apache2-mod-log-ipmask/ + +Files: * +Copyright: Copyright 2018 aquenos GmbH +License: Apache-2.0 + +License: Apache-2.0 + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + . + http://www.apache.org/licenses/LICENSE-2.0 + . + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + . + On Debian systems, the full text of the Apache License, Version 2.0 + can be found in the file `/usr/share/common-licenses/Apache-2.0'. diff --git a/debian/libapache2-mod-log-ipmask.apache2 b/debian/libapache2-mod-log-ipmask.apache2 new file mode 100644 index 0000000..29f0e32 --- /dev/null +++ b/debian/libapache2-mod-log-ipmask.apache2 @@ -0,0 +1,2 @@ +mod debian/log_ipmask.conf +mod debian/log_ipmask.load diff --git a/debian/libapache2-mod-log-ipmask.install b/debian/libapache2-mod-log-ipmask.install new file mode 100644 index 0000000..a81f020 --- /dev/null +++ b/debian/libapache2-mod-log-ipmask.install @@ -0,0 +1,2 @@ +NOTICE.txt usr/share/doc/libapache2-mod-log-ipmask +README.md usr/share/doc/libapache2-mod-log-ipmask diff --git a/debian/log_ipmask.conf b/debian/log_ipmask.conf new file mode 100644 index 0000000..11227bf --- /dev/null +++ b/debian/log_ipmask.conf @@ -0,0 +1,6 @@ + + # Restrict logging of IPv4 addresses to the first 24 bits. + LogDefaultIPv4Mask 24 + # Restrict logging of IPv6 addresses to the first 56 bits. + LogDefaultIPv6Mask 56 + diff --git a/debian/log_ipmask.load b/debian/log_ipmask.load new file mode 100644 index 0000000..983afb7 --- /dev/null +++ b/debian/log_ipmask.load @@ -0,0 +1 @@ +LoadModule log_ipmask_module /usr/lib/apache2/modules/mod_log_ipmask.so diff --git a/debian/rules b/debian/rules new file mode 100755 index 0000000..990760b --- /dev/null +++ b/debian/rules @@ -0,0 +1,9 @@ +#!/usr/bin/make -f + +%: + dh $@ --with apache2 + +override_dh_auto_clean: + make clean + +override_dh_auto_test: diff --git a/debian/source/format b/debian/source/format new file mode 100644 index 0000000..89ae9db --- /dev/null +++ b/debian/source/format @@ -0,0 +1 @@ +3.0 (native) diff --git a/mod_log_ipmask.cpp b/mod_log_ipmask.cpp new file mode 100644 index 0000000..304e642 --- /dev/null +++ b/mod_log_ipmask.cpp @@ -0,0 +1,788 @@ +/** + * Copyright 2018 aquenos GmbH. + * All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +// Minimal optional implementation, similar to, but not as sophisticated as the +// one from C++ 17. +template +class Optional { + +public: + + Optional() : has_value(false) { + } + + Optional(T const &value) : has_value(true), val(value) { + } + + Optional(T &&value) : has_value(true), val(value) { + } + + T &operator*() { + return val; + } + + T const &operator*() const { + return val; + } + + T *operator->() { + return &val; + } + + T const *operator->() const { + return &val; + } + + operator bool() const { + return has_value; + } + + T &value() { + check_has_value(); + return val; + } + + T const &value() const { + check_has_value(); + return val; + } + + T const &value_or(T const &default_value) const { + if (has_value) { + return val; + } else { + return default_value; + } + } + +private: + + bool has_value; + T val; + + inline void check_has_value() { + if (!has_value) { + throw std::runtime_error("Attempt to dereference an empty optional."); + } + } + +}; + +class Parser { + +protected: + + std::string const input; + std::size_t position; + + Parser(std::string const &input) : input(input), position(0) { + } + + bool accept(char c) { + if (is_end_of_string() || input[position] != c) { + return false; + } else { + ++position; + return true; + } + } + + Optional accept_any_of(std::string const &chars) { + if (is_end_of_string() + || chars.find(input[position]) == std::string::npos) { + return Optional(); + } else { + char c = input[position]; + ++position; + return Optional(c); + } + } + + void expect(char c) { + if (!accept(c)) { + throw std::invalid_argument("Did not find expected character."); + } + } + + char expect_any_of(std::string const &chars) { + auto result = accept_any_of(chars); + if (!result) { + throw std::invalid_argument("Did not find expected character."); + } + return *result; + } + + bool is_end_of_string() { + return position == input.length(); + } + +}; + +struct Format_Options { + bool use_peer_ip = false; + Optional masked_bits_ipv4; + Optional masked_bits_ipv6; +}; + +class Format_Options_Parser : Parser { + +public: + + static Format_Options parse(std::string const &input, + bool allow_use_peer_ip) { + auto parser = Format_Options_Parser(input, allow_use_peer_ip); + parser.parse(); + return parser.result; + } + +private: + + bool allow_use_peer_ip; + Format_Options result; + + Format_Options_Parser(std::string const &input, bool allow_use_peer_ip) + : Parser(input), allow_use_peer_ip(allow_use_peer_ip) { + } + + bool accept_use_peer_ip_flag() { + return accept('c'); + } + + void parse() { + if (accept_use_peer_ip_flag()) { + result.use_peer_ip = true; + if (!is_end_of_string()) { + expect('|'); + }; + } + if (is_end_of_string()) { + return; + } + std::string buffer; + buffer.push_back(expect_any_of("0123456789")); + auto optional_char = accept_any_of("0123456789"); + if (optional_char) { + buffer.push_back(*optional_char); + } + result.masked_bits_ipv4 = std::stoi(buffer); + if (*result.masked_bits_ipv4 > 32) { + throw std::invalid_argument("IPv4 address only has 32 bits."); + } + buffer.erase(); + expect('|'); + buffer.push_back(expect_any_of("0123456789")); + optional_char = accept_any_of("0123456789"); + if (optional_char) { + buffer.push_back(*optional_char); + } + optional_char = accept_any_of("0123456789"); + if (optional_char) { + buffer.push_back(*optional_char); + } + result.masked_bits_ipv6 = std::stoi(buffer); + if (*result.masked_bits_ipv6 > 128) { + throw std::invalid_argument("IPv4 address only has 128 bits."); + } + if (!is_end_of_string()) { + throw std::invalid_argument("Unexpected characters at end of string"); + } + } + +}; + +class IPv4_Address_Parser : Parser { + +public: + + static std::array parse(std::string const &input) { + auto parser = IPv4_Address_Parser(input); + parser.parse(); + return parser.result; + } + +private: + + std::array result; + + IPv4_Address_Parser(std::string const &input) : Parser(input) { + } + + Optional accept_digit() { + return accept_any_of("0123456789"); + } + + char expect_digit() { + return expect_any_of("0123456789"); + } + + uint8_t expect_octet() { + std::string buffer; + buffer.push_back(expect_digit()); + auto optional_char = accept_digit(); + if (optional_char) { + buffer.push_back(*optional_char); + } + optional_char = accept_digit(); + if (optional_char) { + buffer.push_back(*optional_char); + } + int result = std::stoi(buffer); + if (result > 255) { + throw std::invalid_argument("Octet value must be less than 256."); + } + return result; + } + + void parse() { + result[0] = expect_octet(); + expect('.'); + result[1] = expect_octet(); + expect('.'); + result[2] = expect_octet(); + expect('.'); + result[3] = expect_octet(); + if (!is_end_of_string()) { + throw std::invalid_argument("Unexpected characters at end of address"); + } + } + +}; + +class IPv6_Address_Parser : Parser { + +public: + + static std::array parse(std::string const &input) { + auto parser = IPv6_Address_Parser(input); + parser.parse(); + return parser.result; + } + +private: + + std::array result; + + IPv6_Address_Parser(std::string const &input) : Parser(input) { + } + + Optional accept_hex_digit() { + return accept_any_of("0123456789ABCDEFabcdef"); + } + + char expect_hex_digit() { + return expect_any_of("0123456789ABCDEFabcdef"); + } + + Optional accept_hextet() { + std::string buffer; + auto optional_char = accept_hex_digit(); + if (!optional_char) { + return Optional(); + } + buffer.push_back(*optional_char); + optional_char = accept_hex_digit(); + if (optional_char) { + buffer.push_back(*optional_char); + } + optional_char = accept_hex_digit(); + if (optional_char) { + buffer.push_back(*optional_char); + } + optional_char = accept_hex_digit(); + if (optional_char) { + buffer.push_back(*optional_char); + } + return Optional(std::stoi(buffer, 0, 16)); + } + + std::uint16_t expect_hextet() { + auto result = accept_hextet(); + if (!result) { + throw std::invalid_argument("Could not find expected hextet."); + } + return *result; + } + + void parse() { + // The IPv6 address is represented by 8 hextets, each storing two bytes. + // The hextets are separated by colons (::) + // Leading zeros in each hextet may be ommitted and one sequence of all zero + // hextets may be abbreviated by the double colon (::). + std::vector hextets_at_start; + std::vector hextets_at_end; + bool found_double_colon = false; + // An address may only start with a colon it is actually a double colon, so + // we can simply skip the start part if the first character is a colon. + // Otherwise, we expect a hextet. + if (accept(':')) { + expect(':'); + found_double_colon = true; + } else { + hextets_at_start.push_back(expect_hextet()); + while (hextets_at_start.size() < 8) { + // Remove the colon after the last hextet. + expect(':'); + // If we find another colon, it means there is a double colon. + if (accept(':')) { + found_double_colon = true; + break; + } else { + hextets_at_start.push_back(expect_hextet()); + } + } + } + // When we get here, we already read 8 hextets or we found a double colon. + // In the second case, we want to read the remaining hextets (if there + // are any). + if (found_double_colon && !is_end_of_string()) { + // If there is a double colon, at least one hextet must have been + // ommitted, so we expect less than 8 hextets in total. + auto remaining_hextets = 7 - hextets_at_start.size(); + while (remaining_hextets != 0) { + // There must be at least one additional hextet. + hextets_at_end.push_back(expect_hextet()); + --remaining_hextets; + if (is_end_of_string()) { + break; + } else { + expect(':'); + if (is_end_of_string()) { + // There must be no colon after the last hextet. + throw std::invalid_argument("Found colon after the last hextet"); + } + } + } + } + // Now, we should have read all characters. + if (!is_end_of_string()) { + throw std::invalid_argument("Unexpected characters at end of address"); + } + int number_of_hextets_at_start = hextets_at_start.size(); + int number_of_missing_hextets = 8 - number_of_hextets_at_start + - hextets_at_end.size(); + int hextets_at_end_offset = number_of_hextets_at_start + + number_of_missing_hextets; + for (int i = 0; i < 8; ++i) { + std::uint16_t hextet; + if (i < number_of_hextets_at_start) { + hextet = hextets_at_start[i]; + } else if (i < hextets_at_end_offset) { + hextet = 0; + } else { + hextet = hextets_at_end[i - hextets_at_end_offset]; + } + result[i * 2] = hextet >> 8; + result[i * 2 + 1] = hextet & 0xff; + } + } + +}; + +template +struct IP_Address { + using octets_type = std::array; + + static constexpr int number_of_octets = _number_of_octets; + octets_type octets; + + IP_Address() { + this->octets.fill(0); + } + + IP_Address(std::string const &str) : octets(Parser::parse(str)) { + } + + IP_Address(octets_type const &octets) : octets(octets) { + } + + IP_Address(octets_type &&octets) : octets(octets) { + } + + void mask(int masked_bits) { + for (int i = 0; i < number_of_octets; ++i) { + if (masked_bits >= 8) { + masked_bits -= 8; + } else if (masked_bits > 0) { + int unmasked_bits = 8 - masked_bits; + uint8_t octet = this->octets[i]; + octet >>= unmasked_bits; + octet <<= unmasked_bits; + this->octets[i] = octet; + masked_bits = 0; + } else { + this->octets[i] = 0; + } + } + } + + std::string str() { + return Printer::print(this->octets); + } + +}; + +class IPv4_Address_Printer { + +public: + + static std::string print(std::array octets) { + IPv4_Address_Printer printer(octets); + return printer.os.str(); + } + + +private: + + std::ostringstream os; + + IPv4_Address_Printer(std::array octets) { + print_octet(octets[0]); + os << '.'; + print_octet(octets[1]); + os << '.'; + print_octet(octets[2]); + os << '.'; + print_octet(octets[3]); + } + + void print_octet(std::uint8_t octet) { + os << static_cast(octet); + } + +}; + +class IPv6_Address_Printer { + +public: + + static std::string print(std::array octets) { + IPv6_Address_Printer printer(octets); + return printer.os.str(); + } + + +private: + + std::ostringstream os; + + IPv6_Address_Printer(std::array octets) { + // First, we convert to hextets. This makes everything else easier. + std::array hextets; + for (int i = 0; i < 8; ++i) { + hextets[i] = (octets[2 * i] << 8) + octets[2 * i + 1]; + } + // We want to find the longest consecutive sequence of empty (zero) hextets. + // If there are several sequences of the same length, we use the left-most. + auto zero_seq_end = hextets.begin(); + auto longest_zero_seq_start = hextets.end(); + auto longest_zero_seq_end = hextets.end(); + auto longest_zero_seq_length = longest_zero_seq_end - longest_zero_seq_start; + do { + auto zero_seq_start = std::find(zero_seq_end, hextets.end(), 0); + zero_seq_end = zero_seq_start; + if (zero_seq_start != hextets.end()) { + zero_seq_end = std::find_if(zero_seq_start, hextets.end(), + [](std::uint8_t o){return o != 0;}); + auto zero_seq_length = zero_seq_end - zero_seq_start; + if (zero_seq_length > longest_zero_seq_length) { + longest_zero_seq_start = zero_seq_start; + longest_zero_seq_end = zero_seq_end; + longest_zero_seq_length = zero_seq_length; + } + } + + } while (zero_seq_end != hextets.end()); + // We only print hex numbers, so we change the format flag. + os.setf(std::ios::hex, std::ios::basefield); + // According to conventions, a single zero hextet is not shortened using a + // double colon, so if the longest sequence is shorter than two hextets, we + // simply print all hextets. Otherwise, we split the printing into the part + // before the double colon and the part after. + if (longest_zero_seq_length < 2) { + os << hextets[0]; + for (int i = 1; i < 8; ++i) { + os << ':'; + os << hextets[i]; + } + } else { + // We print the part before the zero sequence. Note that we print the + // colon after each element. This means that we will also print the first + // colon of the double colon. + for (auto i = hextets.begin(); i != longest_zero_seq_start; ++i) { + os << *i; + os << ':'; + } + // If we did not print a single hextet (because the zero sequence starts + // right at the beginning), we have to print a colon. + if (longest_zero_seq_start == hextets.begin()) { + os << ':'; + } + // We print the part after the zero sequence. Note that we print the + // colon before each element. This means that we will also print the + // second colon of the double colon. + for (auto i = longest_zero_seq_end; i != hextets.end(); ++i) { + os << ':'; + os << *i; + } + // If we did not print a single hextet (because the zero sequence reaches + // right to the end), we have to print a colon. + if (longest_zero_seq_end == hextets.end()) { + os << ':'; + } + } + } + +}; + +using IPv4_Address = IP_Address<4, IPv4_Address_Parser, IPv4_Address_Printer>; +using IPv6_Address = IP_Address<16, IPv6_Address_Parser, IPv6_Address_Printer>; + +} // anonymous namespace + +extern "C" { + + +#include "httpd.h" +#include "http_config.h" +#include "http_core.h" + +#include "apr_strings.h" + +#include "mod_log_config.h" + + +struct log_ipmask_config { + Optional masked_bits_ipv4; + Optional masked_bits_ipv6; +}; + +// We only declare the get-config function here because its implementation +// depends on the module declaration. +static log_ipmask_config *get_log_ipmask_config(ap_conf_vector_t *configs); + +char const* mask_ip_address(char const *address_cstr, log_ipmask_config *config, + Format_Options format_options, apr_pool_t *pool) { + // If no mask has been defined, we use the full address by default. This means + // that only loading this module will not have any effect, without also using + // the respective configuration directives or specifying the special format. + int masked_bits_ipv4 = format_options.masked_bits_ipv4.value_or( + config->masked_bits_ipv4.value_or(32)); + int masked_bits_ipv6 = format_options.masked_bits_ipv6.value_or( + config->masked_bits_ipv6.value_or(128)); + std::string address_str(address_cstr); + std::string masked_address_str; + try { + IPv4_Address address(address_str); + address.mask(masked_bits_ipv4); + masked_address_str = address.str(); + } catch (...) { + // If the address is not an IPv4 address, it might still be an IPv6 address. + try { + IPv6_Address address(address_str); + address.mask(masked_bits_ipv6); + masked_address_str = address.str(); + } catch (...) { + // If the address is not an IPv6 address either, we cannot mask it and + // return the string as-is. + return address_cstr; + } + } + // The C string returned by the C++ string is only valid as long as the string + // object exists, so we have to duplicate it using the memory pool before + // returning it. + return apr_pstrdup(pool, masked_address_str.c_str()); +} + +static char const *log_remote_address(request_rec *request, + char *option_str) { + log_ipmask_config *config = get_log_ipmask_config( + request->server->module_config); + Format_Options format_options; + try { + format_options = Format_Options_Parser::parse(option_str, true); + } catch (...) { + // Ignore any exceptions that occur while parsing the format options and + // simply continue with the default options. + } + char const *orig_ip_addr_str; + if (format_options.use_peer_ip) { + orig_ip_addr_str = request->connection->client_ip; + } else { + orig_ip_addr_str = request->useragent_ip; + } + return mask_ip_address(orig_ip_addr_str, config, format_options, + request->pool); +} + +static char const *log_remote_host(request_rec *request, char *option_str) { + log_ipmask_config *config = get_log_ipmask_config( + request->server->module_config); + Format_Options format_options; + try { + format_options = Format_Options_Parser::parse(option_str, false); + } catch (...) { + // Ignore any exceptions that occur while parsing the format options and + // simply continue with the default options. + } + char const *orig_ip_addr_str = ap_get_remote_host(request->connection, + request->per_dir_config, REMOTE_NAME, NULL); + char const *masked_ip_addr_str = mask_ip_address(orig_ip_addr_str, config, + format_options, request->pool); + // If the remote host is a name instead of an address, we could not mask it + // and we have to escape it because it may contain characters that need to be + // escaped. + if (masked_ip_addr_str == orig_ip_addr_str) { + return ap_escape_logitem(request->pool, orig_ip_addr_str); + } else { + return masked_ip_addr_str; + } +} + +static void *create_log_ipmask_config(apr_pool_t *pool, server_rec *server) { + log_ipmask_config *config = + new(apr_palloc(pool, sizeof(log_ipmask_config))) log_ipmask_config(); + return config; +} + +static void *merge_log_ipmask_config(apr_pool_t *pool, void *base_void, + void *add_void) { + log_ipmask_config *base = reinterpret_cast(base_void); + log_ipmask_config *add = reinterpret_cast(add_void); + + // Merge the configurations. + if (!add->masked_bits_ipv4) { + add->masked_bits_ipv4 = base->masked_bits_ipv4; + } + if (!add->masked_bits_ipv6) { + add->masked_bits_ipv6 = base->masked_bits_ipv6; + } + + return add; +} + +// This function is called early (before most of the configuration processing +// happens). However, it is called after the mod_log_config module has run its +// initialization code, so we can overwrite the log handlers registered by that +// module. +static int log_ipmask_pre_config(apr_pool_t *config_pool, apr_pool_t *log_pool, + apr_pool_t *temp_pool) { + // The parameters to the ap_register_log_handler function are: + // - Memory pool used by the function. + // - Identifier that is used in the format string. + // - Function that shall be invoked for this format identifier. + // - The default value for the want-original flag. The want-original flag is + // set by specifying < or > in front of the format identifier in the format + // string. The value passed to the function is 0 to use the final request by + // default (the same as specifying >) or 1 to use the original request by + // default (the same as specifying <). + auto register_log_handler = APR_RETRIEVE_OPTIONAL_FN(ap_register_log_handler); + if (register_log_handler) { + // We create char arrays instead of passing a string constant. The + // ap_register_log_handler expects a char * instead of a char const *, so + // passing a string constant results in a compiler warning. + char tag_remote_address[] = {'a', 0}; + char tag_remote_host[] = {'h', 0}; + register_log_handler(config_pool, tag_remote_address, log_remote_address, + 0); + register_log_handler(config_pool, tag_remote_host, log_remote_host, 0); + } + return OK; +} + +static char const *set_default_ipv4_mask(cmd_parms *cmd, void *dummy, + char const *arg) { + log_ipmask_config *config = get_log_ipmask_config(cmd->server->module_config); + // We first validate the string. std::stoi would only report an error if it + // cannot convert the string, not if it contained extra characters. + if (!std::regex_match(arg, std::regex("[1-3]?[0-9]"))) { + return "Argument to LogDefaultIPv4Mask must be a number between zero and 32."; + } + int mask_bits = std::atoi(arg); + if (mask_bits > 32) { + return "Argument to LogDefaultIPv4Mask must be a number between zero and 32."; + } + config->masked_bits_ipv4 = mask_bits; + return nullptr; +} + +static char const *set_default_ipv6_mask(cmd_parms *cmd, void *dummy, + char const *arg) { + log_ipmask_config *config = get_log_ipmask_config(cmd->server->module_config); + // We first validate the string. std::stoi would only report an error if it + // cannot convert the string, not if it contained extra characters. + if (!std::regex_match(arg, std::regex("[1]?[0-9]?[0-9]"))) { + return "Argument to LogDefaultIPv4Mask must be a number between zero and 128."; + } + int mask_bits = std::atoi(arg); + if (mask_bits > 128) { + return "Argument to LogDefaultIPv4Mask must be a number between zero and 128."; + } + config->masked_bits_ipv6 = mask_bits; + return nullptr; +} + +// Data structure storing the supported configuration commands. C++ is more +// strict about casting function pointers than C, so we need a reinterpret_cast. +static const command_rec log_ipmask_config_commands[] = { + AP_INIT_TAKE1("LogDefaultIPv4Mask", + reinterpret_cast(set_default_ipv4_mask), nullptr, RSRC_CONF, + "bits of the IPv4 address that shall be included in the log"), + AP_INIT_TAKE1("LogDefaultIPv6Mask", + reinterpret_cast(set_default_ipv6_mask), nullptr, RSRC_CONF, + "bits of the IPv6 address that shall be included in the log"), + {nullptr} +}; + +static void log_ipmask_register_hooks(apr_pool_t *p) { + // The ap_hook_pre_config function takes the predecessor's source file name, not + // the module name. + static const char * predecessors[] = { "mod_log_config.c", nullptr }; + ap_hook_pre_config(log_ipmask_pre_config, predecessors, nullptr, + APR_HOOK_MIDDLE); +} + +// Dispatch list for API hooks +module AP_MODULE_DECLARE_DATA log_ipmask_module = { + STANDARD20_MODULE_STUFF, + nullptr, // create per-dir config structures + nullptr, // merge per-dir config structures + create_log_ipmask_config, // create per-server config structures + merge_log_ipmask_config, // merge per-server config structures + log_ipmask_config_commands, // table of config file commands + log_ipmask_register_hooks // register hooks +}; + +// This function depends on the module declaration, so we have to place it after +// the declaration. +static log_ipmask_config *get_log_ipmask_config(ap_conf_vector_t *configs) { + log_ipmask_config *config = reinterpret_cast( + ap_get_module_config(configs, &log_ipmask_module)); +} + +} // extern "C" diff --git a/modules.mk b/modules.mk new file mode 100644 index 0000000..6ee7a96 --- /dev/null +++ b/modules.mk @@ -0,0 +1,4 @@ +mod_log_ipmask.la: mod_log_ipmask.slo + $(SH_LINK) -rpath $(libexecdir) -module -avoid-version mod_log_ipmask.lo +DISTCLEAN_TARGETS = modules.mk +shared = mod_log_ipmask.la