LCOV - code coverage report
Current view: top level - src/database - GitHubDownloader.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 15 86 17.4 %
Date: 2026-01-22 14:02:01 Functions: 5 10 50.0 %

          Line data    Source code
       1             : // SPDX-FileCopyrightText: 2025 PairInteraction Developers
       2             : // SPDX-License-Identifier: LGPL-3.0-or-later
       3             : 
       4             : #include "pairinteraction/database/GitHubDownloader.hpp"
       5             : 
       6             : #include "pairinteraction/utils/paths.hpp"
       7             : 
       8             : #include <filesystem>
       9             : #include <fmt/core.h>
      10             : #include <fstream>
      11             : #include <future>
      12             : #include <httplib.h>
      13             : #include <spdlog/spdlog.h>
      14             : #include <stdexcept>
      15             : 
      16             : namespace pairinteraction {
      17             : 
      18           0 : void log(const httplib::Request &req, const httplib::Response &res) {
      19           0 :     if (!spdlog::default_logger()->should_log(spdlog::level::debug)) {
      20           0 :         return;
      21             :     }
      22             : 
      23           0 :     SPDLOG_DEBUG("[httplib] {} {}", req.method, req.path);
      24           0 :     for (const auto &[k, v] : req.headers) {
      25           0 :         SPDLOG_DEBUG("[httplib]   {}: {}\n", k, v);
      26             :     }
      27             : 
      28           0 :     SPDLOG_DEBUG("[httplib] Response with status {}", res.status);
      29           0 :     for (const auto &[k, v] : res.headers) {
      30           0 :         SPDLOG_DEBUG("[httplib]   {}: {}\n", k, v);
      31             :     }
      32             : 
      33           0 :     if (res.body.empty()) {
      34           0 :         return;
      35             :     }
      36             : 
      37           0 :     if (res.body.size() > 1024) {
      38           0 :         SPDLOG_DEBUG("[httplib] Body ({} bytes, first 1024 bytes):", res.body.size());
      39             :     } else {
      40           0 :         SPDLOG_DEBUG("[httplib] Body ({} bytes):", res.body.size());
      41             :     }
      42           0 :     SPDLOG_DEBUG("[httplib]   {}", res.body.substr(0, 1024));
      43             : }
      44             : 
      45           5 : GitHubDownloader::GitHubDownloader() : client(std::make_unique<httplib::SSLClient>(host)) {
      46           5 :     client->set_follow_location(true);
      47           5 :     client->set_connection_timeout(5, 0); // seconds
      48           5 :     client->set_read_timeout(60, 0);      // seconds
      49           5 :     client->set_write_timeout(1, 0);      // seconds
      50           5 :     client->set_logger(log);
      51           5 :     client->load_ca_cert_store(cert.data(), cert.size());
      52           5 : }
      53             : 
      54           5 : GitHubDownloader::~GitHubDownloader() = default;
      55             : 
      56             : std::future<GitHubDownloader::Result>
      57           0 : GitHubDownloader::download(const std::string &remote_url, const std::string &if_modified_since,
      58             :                            bool use_octet_stream) const {
      59             :     return std::async(
      60           0 :         std::launch::async, [this, remote_url, if_modified_since, use_octet_stream]() -> Result {
      61           0 :             SPDLOG_DEBUG("Downloading from GitHub: {}", remote_url);
      62             : 
      63             :             // Prepare headers
      64             :             httplib::Headers headers{
      65             :                 {"User-Agent", "pairinteraction"},
      66             :                 {"X-GitHub-Api-Version", "2022-11-28"},
      67             :                 {"Accept",
      68           0 :                  use_octet_stream ? "application/octet-stream" : "application/vnd.github+json"}};
      69             : 
      70           0 :             if (!if_modified_since.empty()) {
      71           0 :                 headers.emplace("if-modified-since", if_modified_since);
      72             :             }
      73             : 
      74             :             // Use the GitHub token if available; otherwise, if we have a conditional request,
      75             :             // insert a dummy authorization header to avoid increasing rate limits
      76           0 :             if (auto *token = std::getenv("GITHUB_TOKEN"); token) {
      77           0 :                 headers.emplace("Authorization", fmt::format("Bearer {}", token));
      78           0 :             } else if (!if_modified_since.empty()) {
      79           0 :                 headers.emplace("Authorization",
      80             :                                 "avoids-an-increase-in-ratelimits-used-if-304-is-returned");
      81             :             }
      82             : 
      83             :             // If we're fetching binary, stream with a progress callback; otherwise use a simple get
      84           0 :             httplib::Result response;
      85           0 :             std::string streamed_body;
      86           0 :             if (use_octet_stream) {
      87           0 :                 auto content_receiver = [&](const char *data, size_t len) {
      88           0 :                     streamed_body.append(data, len);
      89           0 :                     return true;
      90           0 :                 };
      91             : 
      92             :                 // Progress display
      93           0 :                 int last_pct = -1;
      94           0 :                 auto progress_display = [&last_pct, remote_url](uint64_t cur, uint64_t total) {
      95           0 :                     if (total == 0) {
      96           0 :                         fmt::print(stderr, "\rDownloading {}...", remote_url);
      97           0 :                         (void)std::fflush(stderr);
      98           0 :                     } else if (int pct = static_cast<int>((cur * 100) / total); pct != last_pct) {
      99           0 :                         last_pct = pct;
     100           0 :                         fmt::print(stderr, "\rDownloading {}... {:3d}%", remote_url, pct);
     101           0 :                         (void)std::fflush(stderr);
     102             :                     }
     103           0 :                     return true;
     104           0 :                 };
     105             : 
     106           0 :                 response = client->Get(remote_url, headers, content_receiver, progress_display);
     107             : 
     108             :                 // Ensure the progress display ends cleanly if we showed it
     109           0 :                 if (last_pct >= 0) {
     110           0 :                     fmt::print(stderr, "\n");
     111           0 :                     (void)std::fflush(stderr);
     112             :                 }
     113           0 :             } else {
     114           0 :                 response = client->Get(remote_url, headers);
     115             :             }
     116             : 
     117             :             // Handle if the response is null
     118           0 :             if (!response) {
     119             :                 // Defensive handling: if response is null and the error is unknown,
     120             :                 // treat this as a 304 Not Modified
     121           0 :                 if (response.error() == httplib::Error::Unknown) {
     122           0 :                     return Result{304, "", "", {}};
     123             :                 }
     124           0 :                 throw std::runtime_error(fmt::format("Error downloading '{}': {}", remote_url,
     125           0 :                                                      httplib::to_string(response.error())));
     126             :             }
     127             : 
     128             :             // Parse the response
     129           0 :             Result result;
     130           0 :             if (response->has_header("x-ratelimit-remaining")) {
     131           0 :                 result.rate_limit.remaining =
     132           0 :                     std::stoi(response->get_header_value("x-ratelimit-remaining"));
     133             :             }
     134           0 :             if (response->has_header("x-ratelimit-reset")) {
     135           0 :                 result.rate_limit.reset_time =
     136           0 :                     std::stoi(response->get_header_value("x-ratelimit-reset"));
     137             :             }
     138           0 :             if (response->has_header("last-modified")) {
     139           0 :                 result.last_modified = response->get_header_value("last-modified");
     140             :             }
     141           0 :             result.body = use_octet_stream ? std::move(streamed_body) : response->body;
     142           0 :             result.status_code = response->status;
     143             : 
     144           0 :             SPDLOG_DEBUG("Response status: {}", response->status);
     145           0 :             return result;
     146           0 :         });
     147           0 : }
     148             : 
     149           1 : GitHubDownloader::RateLimit GitHubDownloader::get_rate_limit() const {
     150             :     // This call now either returns valid rate limit data or throws an exception on error
     151           1 :     Result result = download("/rate_limit", "", false).get();
     152           1 :     if (result.status_code != 200) {
     153           0 :         throw std::runtime_error(
     154           0 :             fmt::format("Failed obtaining the rate limit: status code {}.", result.status_code));
     155             :     }
     156           1 :     return result.rate_limit;
     157           1 : }
     158             : 
     159           1 : std::string GitHubDownloader::get_host() const { return "https://" + host; }
     160             : 
     161             : } // namespace pairinteraction

Generated by: LCOV version 1.16