LCOV - code coverage report
Current view: top level - src/database - GitHubDownloader.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 26 98 26.5 %
Date: 2026-06-19 12:50:25 Functions: 8 13 61.5 %

          Line data    Source code
       1             : // SPDX-FileCopyrightText: 2025 PairInteraction Developers
       2             : // SPDX-License-Identifier: LGPL-3.0-or-later
       3             : 
       4             : #include "pairinteraction/database/GitHubDownloader.hpp"
       5             : 
       6             : #include "pairinteraction/utils/paths.hpp"
       7             : 
       8             : #include <filesystem>
       9             : #include <fmt/core.h>
      10             : #include <fmt/format.h>
      11             : #include <fstream>
      12             : #include <future>
      13             : #include <httplib.h>
      14             : #include <spdlog/spdlog.h>
      15             : #include <stdexcept>
      16             : 
      17             : namespace pairinteraction {
      18             : namespace {
      19             : 
      20           7 : std::filesystem::path &ca_bundle_path() {
      21           7 :     static std::filesystem::path path;
      22           7 :     return path;
      23             : }
      24             : 
      25           6 : bool load_ca_bundle_from_path(httplib::SSLClient &client, const std::filesystem::path &path) {
      26           6 :     if (path.empty() || !std::filesystem::is_regular_file(path)) {
      27           0 :         return false;
      28             :     }
      29             : 
      30           6 :     std::ifstream in(path, std::ios::binary);
      31           6 :     std::string pem((std::istreambuf_iterator<char>(in)), {});
      32             : 
      33           6 :     if (!in || pem.empty()) {
      34           0 :         SPDLOG_WARN("Failed to read CA bundle from {}.", path.string());
      35           0 :         return false;
      36             :     }
      37             : 
      38           6 :     client.load_ca_cert_store(pem.data(), pem.size());
      39           6 :     SPDLOG_INFO("Using CA bundle from {}.", path.string());
      40           6 :     return true;
      41           6 : }
      42             : 
      43           0 : void log(const httplib::Request &req, const httplib::Response &res) {
      44           0 :     if (!spdlog::default_logger()->should_log(spdlog::level::debug)) {
      45           0 :         return;
      46             :     }
      47             : 
      48           0 :     SPDLOG_DEBUG("[httplib] {} {}", req.method, req.path);
      49           0 :     for (const auto &[k, v] : req.headers) {
      50           0 :         SPDLOG_DEBUG("[httplib]   {}: {}\n", k, v);
      51             :     }
      52             : 
      53           0 :     SPDLOG_DEBUG("[httplib] Response with status {}", res.status);
      54           0 :     for (const auto &[k, v] : res.headers) {
      55           0 :         SPDLOG_DEBUG("[httplib]   {}: {}\n", k, v);
      56             :     }
      57             : 
      58           0 :     if (res.body.empty()) {
      59           0 :         return;
      60             :     }
      61             : 
      62           0 :     if (res.body.size() > 1024) {
      63           0 :         SPDLOG_DEBUG("[httplib] Body ({} bytes, first 1024 bytes):", res.body.size());
      64             :     } else {
      65           0 :         SPDLOG_DEBUG("[httplib] Body ({} bytes):", res.body.size());
      66             :     }
      67           0 :     SPDLOG_DEBUG("[httplib]   {}", res.body.substr(0, 1024));
      68             : }
      69             : 
      70             : } // namespace
      71             : 
      72           1 : void set_ca_bundle_path(std::filesystem::path path) { ca_bundle_path() = std::move(path); }
      73             : 
      74           6 : std::filesystem::path get_ca_bundle_path() { return ca_bundle_path(); }
      75             : 
      76           6 : GitHubDownloader::GitHubDownloader() : client(std::make_unique<httplib::SSLClient>(host)) {
      77           6 :     client->set_follow_location(true);
      78           6 :     client->set_connection_timeout(5, 0); // seconds
      79           6 :     client->set_read_timeout(60, 0);      // seconds
      80           6 :     client->set_write_timeout(1, 0);      // seconds
      81           6 :     client->set_logger(log);
      82             : 
      83           6 :     if (const auto configured_path = get_ca_bundle_path(); !configured_path.empty()) {
      84           6 :         load_ca_bundle_from_path(*client, configured_path);
      85           6 :     }
      86           6 : }
      87             : 
      88           7 : GitHubDownloader::~GitHubDownloader() = default;
      89             : 
      90             : std::future<GitHubDownloader::Result>
      91           0 : GitHubDownloader::download(const std::string &remote_url, const std::string &if_modified_since,
      92             :                            bool use_octet_stream) const {
      93             :     return std::async(
      94           0 :         std::launch::async, [this, remote_url, if_modified_since, use_octet_stream]() -> Result {
      95           0 :             SPDLOG_DEBUG("Downloading from GitHub: {}", remote_url);
      96             : 
      97             :             // Prepare headers
      98             :             httplib::Headers headers{
      99             :                 {"User-Agent", "pairinteraction"},
     100             :                 {"X-GitHub-Api-Version", "2022-11-28"},
     101             :                 {"Accept",
     102           0 :                  use_octet_stream ? "application/octet-stream" : "application/vnd.github+json"}};
     103             : 
     104           0 :             if (!if_modified_since.empty()) {
     105           0 :                 headers.emplace("if-modified-since", if_modified_since);
     106             :             }
     107             : 
     108             :             // Use the GitHub token if available; otherwise, if we have a conditional request,
     109             :             // insert a dummy authorization header to avoid increasing rate limits
     110           0 :             if (auto *token = std::getenv("GITHUB_TOKEN"); token) {
     111           0 :                 headers.emplace("Authorization", fmt::format("Bearer {}", token));
     112           0 :             } else if (!if_modified_since.empty()) {
     113           0 :                 headers.emplace("Authorization",
     114             :                                 "avoids-an-increase-in-ratelimits-used-if-304-is-returned");
     115             :             }
     116             : 
     117             :             // If we're fetching binary, stream with a progress callback; otherwise use a simple get
     118           0 :             httplib::Result response;
     119           0 :             std::string streamed_body;
     120           0 :             if (use_octet_stream) {
     121           0 :                 auto content_receiver = [&](const char *data, size_t len) {
     122           0 :                     streamed_body.append(data, len);
     123           0 :                     return true;
     124           0 :                 };
     125             : 
     126             :                 // Progress display
     127           0 :                 int last_pct = -1;
     128           0 :                 auto progress_display = [&last_pct, remote_url](uint64_t cur, uint64_t total) {
     129           0 :                     if (total == 0) {
     130           0 :                         fmt::print(stderr, "\rDownloading {}...", remote_url);
     131           0 :                         (void)std::fflush(stderr);
     132           0 :                     } else if (int pct = static_cast<int>((cur * 100) / total); pct != last_pct) {
     133           0 :                         last_pct = pct;
     134           0 :                         fmt::print(stderr, "\rDownloading {}... {:3d}%", remote_url, pct);
     135           0 :                         (void)std::fflush(stderr);
     136             :                     }
     137           0 :                     return true;
     138           0 :                 };
     139             : 
     140           0 :                 response = client->Get(remote_url, headers, content_receiver, progress_display);
     141             : 
     142             :                 // Ensure the progress display ends cleanly if we showed it
     143           0 :                 if (last_pct >= 0) {
     144           0 :                     fmt::print(stderr, "\n");
     145           0 :                     (void)std::fflush(stderr);
     146             :                 }
     147           0 :             } else {
     148           0 :                 response = client->Get(remote_url, headers);
     149             :             }
     150             : 
     151             :             // Handle if the response is null
     152           0 :             if (!response) {
     153             :                 // Defensive handling: if response is null and the error is unknown,
     154             :                 // treat this as a 304 Not Modified
     155           0 :                 if (response.error() == httplib::Error::Unknown) {
     156           0 :                     return Result{304, "", "", {}};
     157             :                 }
     158           0 :                 throw std::runtime_error(fmt::format("Error downloading '{}': {}", remote_url,
     159           0 :                                                      httplib::to_string(response.error())));
     160             :             }
     161             : 
     162             :             // Parse the response
     163           0 :             Result result;
     164           0 :             if (response->has_header("x-ratelimit-remaining")) {
     165           0 :                 result.rate_limit.remaining =
     166           0 :                     std::stoi(response->get_header_value("x-ratelimit-remaining"));
     167             :             }
     168           0 :             if (response->has_header("x-ratelimit-reset")) {
     169           0 :                 result.rate_limit.reset_time =
     170           0 :                     std::stoi(response->get_header_value("x-ratelimit-reset"));
     171             :             }
     172           0 :             if (response->has_header("last-modified")) {
     173           0 :                 result.last_modified = response->get_header_value("last-modified");
     174             :             }
     175           0 :             result.body = use_octet_stream ? std::move(streamed_body) : response->body;
     176           0 :             result.status_code = response->status;
     177             : 
     178           0 :             SPDLOG_DEBUG("Response status: {}", response->status);
     179           0 :             return result;
     180           0 :         });
     181           0 : }
     182             : 
     183           1 : std::string GitHubDownloader::get_host() const { return "https://" + host; }
     184             : 
     185             : } // namespace pairinteraction

Generated by: LCOV version 1.16