diff --git a/doc/dnf5.conf.5.rst b/doc/dnf5.conf.5.rst index 2a72e4cad..347b3cd10 100644 --- a/doc/dnf5.conf.5.rst +++ b/doc/dnf5.conf.5.rst @@ -636,23 +636,50 @@ configuration. ``countme`` :ref:`boolean ` - Determines whether a special flag should be added to a single, randomly - chosen metalink/mirrorlist query each week. - This allows the repository owner to estimate the number of systems - consuming it, by counting such queries over a week's time, which is much - more accurate than just counting unique IP addresses (which is subject to - both overcounting and undercounting due to short DHCP leases and NAT, - respectively). - - The flag is a simple "countme=N" parameter appended to the metalink and - mirrorlist URL, where N is an integer representing the "longevity" bucket - this system belongs to. - The following 4 buckets are defined, based on how many full weeks have - passed since the beginning of the week when this system was installed: 1 = - first week, 2 = first month (2-4 weeks), 3 = six months (5-24 weeks) and 4 - = more than six months (> 24 weeks). - This information is meant to help distinguish short-lived installs from - long-term ones, and to gather other statistics about system lifecycle. + When enabled, one (and only one) HTTP GET request for the metalink file + will be selected at random every week to carry a special URL flag. + + This flag allows the repository provider to estimate the number of systems + consuming the repository, by counting such requests over a week's time. + This method is more accurate than just counting unique IP addresses (which + is subject to both overcounting and undercounting due to short DHCP leases + and NAT, respectively). + + This is *not* an out-of-band HTTP request made for this purpose alone. + Only requests initiated by DNF during normal operation, such as to check + for metadata updates, can get this flag. + + The flag is a simple "countme=N" parameter appended to the metalink URL + where N is an integer representing the age "bucket" this system belongs to. + Four buckets are defined, based on how many full weeks have passed since + the installation of a system: + + ====== =============================== + bucket system age + ====== =============================== + 1 first week + 2 first month (2 - 4 weeks) + 3 first 6 months (5 - 24 weeks) + 4 more than 6 months (> 24 weeks) + ====== =============================== + + This number is meant to help distinguish short-lived (throwaway) machines + from long-term installs and get a better picture of how systems are used + over time. + + To determine a system's installation time ("epoch"), the ``machine-id(5)`` + file's modification time is used as the single source of truth. This file + is semantically tied to the system's lifetime as it's typically populated + at installation time or during the first boot by an installer tool or init + system (such as ``systemd(1)``), respectively, and remains unchanged. + + If the file is empty or missing (such as in containers), the time of the + very first request made using the expanded metalink URL (i.e. with any + repository variables such as ``$releasever`` substituted) that carried the + flag is declared as the epoch. + + If no metalink URL is defined for this repository but a mirrorlist URL is, + the latter is used for this purpose instead. Default: ``False``. diff --git a/libdnf5/repo/repo_downloader.cpp b/libdnf5/repo/repo_downloader.cpp index 7d84fad5b..b142aabbe 100644 --- a/libdnf5/repo/repo_downloader.cpp +++ b/libdnf5/repo/repo_downloader.cpp @@ -31,6 +31,7 @@ along with libdnf. If not, see . #include #include #include +#include #include #include @@ -682,6 +683,9 @@ const std::array COUNTME_BUCKETS = {{2, 5, 25}}; /// This is to align the time window with an absolute point in time rather /// than the last counting event (which could facilitate tracking across /// multiple such events). +/// +/// In the below comments, the window's current position will be referred to +/// as "this window" for brevity. void RepoDownloader::add_countme_flag(LibrepoHandle & handle) { auto & logger = *base->get_logger(); @@ -711,7 +715,7 @@ void RepoDownloader::add_countme_flag(LibrepoHandle & handle) { file_path /= COUNTME_COOKIE; int ver = COUNTME_VERSION; // file format version (for future use) - time_t epoch = 0; // position of first-ever counted window + time_t epoch = 0; // position of first observed window time_t win = COUNTME_OFFSET; // position of last counted window int budget = -1; // budget for this window (-1 = generate) // TODO(lukash) ideally replace with utils::fs::File (via adding scanf() support?), @@ -743,8 +747,15 @@ void RepoDownloader::add_countme_flag(LibrepoHandle & handle) { // Compute the position of this window win = now - (delta % COUNTME_WINDOW); + + // Compute the epoch from this system's epoch or, if unknown, declare + // this window as the epoch (unless stored in the cookie previously). + time_t sysepoch = get_system_epoch(); + if (sysepoch) + epoch = sysepoch - ((sysepoch - COUNTME_OFFSET) % COUNTME_WINDOW); if (!epoch) epoch = win; + // Window step (0 at epoch) int64_t step = (win - epoch) / COUNTME_WINDOW; @@ -753,7 +764,7 @@ void RepoDownloader::add_countme_flag(LibrepoHandle & handle) { for (i = 0; i < COUNTME_BUCKETS.size(); ++i) if (step < COUNTME_BUCKETS[i]) break; - uint32_t bucket = i + 1; // Buckets are indexed from 1 + uint32_t bucket = i + 1; // Buckets are numbered from 1 // Set the flag std::string flag = "countme=" + std::to_string(bucket); @@ -781,6 +792,31 @@ std::set RepoDownloader::get_optional_metadata() const { } +/* Returns this system's installation time ("epoch") as a UNIX timestamp. + * + * Uses the machine-id(5) file's mtime as a good-enough source of truth. This + * file is typically tied to the system's installation or first boot where it's + * populated by an installer tool or init system, respectively, and is never + * changed afterwards. + * + * Some systems, such as containers that don't run an init system, may have the + * file missing, empty or uninitialized, in which case this function returns 0. + */ +time_t RepoDownloader::get_system_epoch() const { + std::string filename = "/etc/machine-id"; + std::string id; + struct stat st; + + if (stat(filename.c_str(), &st) != 0 || !st.st_size) + return 0; + std::ifstream(filename) >> id; + if (id == "uninitialized") + return 0; + + return st.st_mtime; +} + + //void Downloader::download_url(ConfigMain * cfg, const char * url, int fd) { // std::unique_ptr lr_handle(new_remote_handle(*cfg)); // GError * err_p{nullptr}; diff --git a/libdnf5/repo/repo_downloader.hpp b/libdnf5/repo/repo_downloader.hpp index f6adcf7ef..1c14b272b 100644 --- a/libdnf5/repo/repo_downloader.hpp +++ b/libdnf5/repo/repo_downloader.hpp @@ -96,6 +96,7 @@ class RepoDownloader { std::string get_persistdir() const; void add_countme_flag(LibrepoHandle & handle); + time_t get_system_epoch() const; std::set get_optional_metadata() const;