LCOV - code coverage report
Current view: top level - src/database - GitHubDownloader.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 26 98 26.5 %
Date: 2026-04-17 09:20:02 Functions: 8 13 61.5 %

          Line data    Source code
       1             : // SPDX-FileCopyrightText: 2025 PairInteraction Developers
       2             : // SPDX-License-Identifier: LGPL-3.0-or-later
       3             : 
       4             : #include "pairinteraction/database/GitHubDownloader.hpp"
       5             : 
       6             : #include "pairinteraction/utils/paths.hpp"
       7             : 
       8             : #include <filesystem>
       9             : #include <fmt/core.h>
      10             : #include <fstream>
      11             : #include <future>
      12             : #include <httplib.h>
      13             : #include <spdlog/spdlog.h>
      14             : #include <stdexcept>
      15             : 
      16             : namespace pairinteraction {
      17             : namespace {
      18             : 
      19           6 : std::filesystem::path &ca_bundle_path() {
      20           6 :     static std::filesystem::path path;
      21           6 :     return path;
      22             : }
      23             : 
      24           5 : bool load_ca_bundle_from_path(httplib::SSLClient &client, const std::filesystem::path &path) {
      25           5 :     if (path.empty() || !std::filesystem::is_regular_file(path)) {
      26           0 :         return false;
      27             :     }
      28             : 
      29           5 :     std::ifstream in(path, std::ios::binary);
      30           5 :     std::string pem((std::istreambuf_iterator<char>(in)), {});
      31             : 
      32           5 :     if (!in || pem.empty()) {
      33           0 :         SPDLOG_WARN("Failed to read CA bundle from {}.", path.string());
      34           0 :         return false;
      35             :     }
      36             : 
      37           5 :     client.load_ca_cert_store(pem.data(), pem.size());
      38           5 :     SPDLOG_INFO("Using CA bundle from {}.", path.string());
      39           5 :     return true;
      40           5 : }
      41             : 
      42           0 : void log(const httplib::Request &req, const httplib::Response &res) {
      43           0 :     if (!spdlog::default_logger()->should_log(spdlog::level::debug)) {
      44           0 :         return;
      45             :     }
      46             : 
      47           0 :     SPDLOG_DEBUG("[httplib] {} {}", req.method, req.path);
      48           0 :     for (const auto &[k, v] : req.headers) {
      49           0 :         SPDLOG_DEBUG("[httplib]   {}: {}\n", k, v);
      50             :     }
      51             : 
      52           0 :     SPDLOG_DEBUG("[httplib] Response with status {}", res.status);
      53           0 :     for (const auto &[k, v] : res.headers) {
      54           0 :         SPDLOG_DEBUG("[httplib]   {}: {}\n", k, v);
      55             :     }
      56             : 
      57           0 :     if (res.body.empty()) {
      58           0 :         return;
      59             :     }
      60             : 
      61           0 :     if (res.body.size() > 1024) {
      62           0 :         SPDLOG_DEBUG("[httplib] Body ({} bytes, first 1024 bytes):", res.body.size());
      63             :     } else {
      64           0 :         SPDLOG_DEBUG("[httplib] Body ({} bytes):", res.body.size());
      65             :     }
      66           0 :     SPDLOG_DEBUG("[httplib]   {}", res.body.substr(0, 1024));
      67             : }
      68             : 
      69             : } // namespace
      70             : 
      71           1 : void set_ca_bundle_path(std::filesystem::path path) { ca_bundle_path() = std::move(path); }
      72             : 
      73           5 : std::filesystem::path get_ca_bundle_path() { return ca_bundle_path(); }
      74             : 
      75           5 : GitHubDownloader::GitHubDownloader() : client(std::make_unique<httplib::SSLClient>(host)) {
      76           5 :     client->set_follow_location(true);
      77           5 :     client->set_connection_timeout(5, 0); // seconds
      78           5 :     client->set_read_timeout(60, 0);      // seconds
      79           5 :     client->set_write_timeout(1, 0);      // seconds
      80           5 :     client->set_logger(log);
      81             : 
      82           5 :     if (const auto configured_path = get_ca_bundle_path(); !configured_path.empty()) {
      83           5 :         load_ca_bundle_from_path(*client, configured_path);
      84           5 :     }
      85           5 : }
      86             : 
      87           5 : GitHubDownloader::~GitHubDownloader() = default;
      88             : 
      89             : std::future<GitHubDownloader::Result>
      90           0 : GitHubDownloader::download(const std::string &remote_url, const std::string &if_modified_since,
      91             :                            bool use_octet_stream) const {
      92             :     return std::async(
      93           0 :         std::launch::async, [this, remote_url, if_modified_since, use_octet_stream]() -> Result {
      94           0 :             SPDLOG_DEBUG("Downloading from GitHub: {}", remote_url);
      95             : 
      96             :             // Prepare headers
      97             :             httplib::Headers headers{
      98             :                 {"User-Agent", "pairinteraction"},
      99             :                 {"X-GitHub-Api-Version", "2022-11-28"},
     100             :                 {"Accept",
     101           0 :                  use_octet_stream ? "application/octet-stream" : "application/vnd.github+json"}};
     102             : 
     103           0 :             if (!if_modified_since.empty()) {
     104           0 :                 headers.emplace("if-modified-since", if_modified_since);
     105             :             }
     106             : 
     107             :             // Use the GitHub token if available; otherwise, if we have a conditional request,
     108             :             // insert a dummy authorization header to avoid increasing rate limits
     109           0 :             if (auto *token = std::getenv("GITHUB_TOKEN"); token) {
     110           0 :                 headers.emplace("Authorization", fmt::format("Bearer {}", token));
     111           0 :             } else if (!if_modified_since.empty()) {
     112           0 :                 headers.emplace("Authorization",
     113             :                                 "avoids-an-increase-in-ratelimits-used-if-304-is-returned");
     114             :             }
     115             : 
     116             :             // If we're fetching binary, stream with a progress callback; otherwise use a simple get
     117           0 :             httplib::Result response;
     118           0 :             std::string streamed_body;
     119           0 :             if (use_octet_stream) {
     120           0 :                 auto content_receiver = [&](const char *data, size_t len) {
     121           0 :                     streamed_body.append(data, len);
     122           0 :                     return true;
     123           0 :                 };
     124             : 
     125             :                 // Progress display
     126           0 :                 int last_pct = -1;
     127           0 :                 auto progress_display = [&last_pct, remote_url](uint64_t cur, uint64_t total) {
     128           0 :                     if (total == 0) {
     129           0 :                         fmt::print(stderr, "\rDownloading {}...", remote_url);
     130           0 :                         (void)std::fflush(stderr);
     131           0 :                     } else if (int pct = static_cast<int>((cur * 100) / total); pct != last_pct) {
     132           0 :                         last_pct = pct;
     133           0 :                         fmt::print(stderr, "\rDownloading {}... {:3d}%", remote_url, pct);
     134           0 :                         (void)std::fflush(stderr);
     135             :                     }
     136           0 :                     return true;
     137           0 :                 };
     138             : 
     139           0 :                 response = client->Get(remote_url, headers, content_receiver, progress_display);
     140             : 
     141             :                 // Ensure the progress display ends cleanly if we showed it
     142           0 :                 if (last_pct >= 0) {
     143           0 :                     fmt::print(stderr, "\n");
     144           0 :                     (void)std::fflush(stderr);
     145             :                 }
     146           0 :             } else {
     147           0 :                 response = client->Get(remote_url, headers);
     148             :             }
     149             : 
     150             :             // Handle if the response is null
     151           0 :             if (!response) {
     152             :                 // Defensive handling: if response is null and the error is unknown,
     153             :                 // treat this as a 304 Not Modified
     154           0 :                 if (response.error() == httplib::Error::Unknown) {
     155           0 :                     return Result{304, "", "", {}};
     156             :                 }
     157           0 :                 throw std::runtime_error(fmt::format("Error downloading '{}': {}", remote_url,
     158           0 :                                                      httplib::to_string(response.error())));
     159             :             }
     160             : 
     161             :             // Parse the response
     162           0 :             Result result;
     163           0 :             if (response->has_header("x-ratelimit-remaining")) {
     164           0 :                 result.rate_limit.remaining =
     165           0 :                     std::stoi(response->get_header_value("x-ratelimit-remaining"));
     166             :             }
     167           0 :             if (response->has_header("x-ratelimit-reset")) {
     168           0 :                 result.rate_limit.reset_time =
     169           0 :                     std::stoi(response->get_header_value("x-ratelimit-reset"));
     170             :             }
     171           0 :             if (response->has_header("last-modified")) {
     172           0 :                 result.last_modified = response->get_header_value("last-modified");
     173             :             }
     174           0 :             result.body = use_octet_stream ? std::move(streamed_body) : response->body;
     175           0 :             result.status_code = response->status;
     176             : 
     177           0 :             SPDLOG_DEBUG("Response status: {}", response->status);
     178           0 :             return result;
     179           0 :         });
     180           0 : }
     181             : 
     182           1 : std::string GitHubDownloader::get_host() const { return "https://" + host; }
     183             : 
     184             : } // namespace pairinteraction

Generated by: LCOV version 1.16