Line data Source code
1 : // SPDX-FileCopyrightText: 2025 PairInteraction Developers
2 : // SPDX-License-Identifier: LGPL-3.0-or-later
3 :
4 : #include "pairinteraction/database/GitHubDownloader.hpp"
5 :
6 : #include "pairinteraction/utils/paths.hpp"
7 :
8 : #include <filesystem>
9 : #include <fmt/core.h>
10 : #include <fstream>
11 : #include <future>
12 : #include <httplib.h>
13 : #include <spdlog/spdlog.h>
14 : #include <stdexcept>
15 :
16 : namespace pairinteraction {
17 :
18 0 : void log(const httplib::Request &req, const httplib::Response &res) {
19 0 : if (!spdlog::default_logger()->should_log(spdlog::level::debug)) {
20 0 : return;
21 : }
22 :
23 0 : SPDLOG_DEBUG("[httplib] {} {}", req.method, req.path);
24 0 : for (const auto &[k, v] : req.headers) {
25 0 : SPDLOG_DEBUG("[httplib] {}: {}\n", k, v);
26 : }
27 :
28 0 : SPDLOG_DEBUG("[httplib] Response with status {}", res.status);
29 0 : for (const auto &[k, v] : res.headers) {
30 0 : SPDLOG_DEBUG("[httplib] {}: {}\n", k, v);
31 : }
32 :
33 0 : if (res.body.empty()) {
34 0 : return;
35 : }
36 :
37 0 : if (res.body.size() > 1024) {
38 0 : SPDLOG_DEBUG("[httplib] Body ({} bytes, first 1024 bytes):", res.body.size());
39 : } else {
40 0 : SPDLOG_DEBUG("[httplib] Body ({} bytes):", res.body.size());
41 : }
42 0 : SPDLOG_DEBUG("[httplib] {}", res.body.substr(0, 1024));
43 : }
44 :
45 5 : GitHubDownloader::GitHubDownloader() : client(std::make_unique<httplib::SSLClient>(host)) {
46 5 : client->set_follow_location(true);
47 5 : client->set_connection_timeout(5, 0); // seconds
48 5 : client->set_read_timeout(60, 0); // seconds
49 5 : client->set_write_timeout(1, 0); // seconds
50 5 : client->set_logger(log);
51 5 : client->load_ca_cert_store(cert.data(), cert.size());
52 5 : }
53 :
54 5 : GitHubDownloader::~GitHubDownloader() = default;
55 :
56 : std::future<GitHubDownloader::Result>
57 0 : GitHubDownloader::download(const std::string &remote_url, const std::string &if_modified_since,
58 : bool use_octet_stream) const {
59 : return std::async(
60 0 : std::launch::async, [this, remote_url, if_modified_since, use_octet_stream]() -> Result {
61 0 : SPDLOG_DEBUG("Downloading from GitHub: {}", remote_url);
62 :
63 : // Prepare headers
64 : httplib::Headers headers{
65 : {"User-Agent", "pairinteraction"},
66 : {"X-GitHub-Api-Version", "2022-11-28"},
67 : {"Accept",
68 0 : use_octet_stream ? "application/octet-stream" : "application/vnd.github+json"}};
69 :
70 0 : if (!if_modified_since.empty()) {
71 0 : headers.emplace("if-modified-since", if_modified_since);
72 : }
73 :
74 : // Use the GitHub token if available; otherwise, if we have a conditional request,
75 : // insert a dummy authorization header to avoid increasing rate limits
76 0 : if (auto *token = std::getenv("GITHUB_TOKEN"); token) {
77 0 : headers.emplace("Authorization", fmt::format("Bearer {}", token));
78 0 : } else if (!if_modified_since.empty()) {
79 0 : headers.emplace("Authorization",
80 : "avoids-an-increase-in-ratelimits-used-if-304-is-returned");
81 : }
82 :
83 : // If we're fetching binary, stream with a progress callback; otherwise use a simple get
84 0 : httplib::Result response;
85 0 : std::string streamed_body;
86 0 : if (use_octet_stream) {
87 0 : auto content_receiver = [&](const char *data, size_t len) {
88 0 : streamed_body.append(data, len);
89 0 : return true;
90 0 : };
91 :
92 : // Progress display
93 0 : int last_pct = -1;
94 0 : auto progress_display = [&last_pct, remote_url](uint64_t cur, uint64_t total) {
95 0 : if (total == 0) {
96 0 : fmt::print(stderr, "\rDownloading {}...", remote_url);
97 0 : (void)std::fflush(stderr);
98 0 : } else if (int pct = static_cast<int>((cur * 100) / total); pct != last_pct) {
99 0 : last_pct = pct;
100 0 : fmt::print(stderr, "\rDownloading {}... {:3d}%", remote_url, pct);
101 0 : (void)std::fflush(stderr);
102 : }
103 0 : return true;
104 0 : };
105 :
106 0 : response = client->Get(remote_url, headers, content_receiver, progress_display);
107 :
108 : // Ensure the progress display ends cleanly if we showed it
109 0 : if (last_pct >= 0) {
110 0 : fmt::print(stderr, "\n");
111 0 : (void)std::fflush(stderr);
112 : }
113 0 : } else {
114 0 : response = client->Get(remote_url, headers);
115 : }
116 :
117 : // Handle if the response is null
118 0 : if (!response) {
119 : // Defensive handling: if response is null and the error is unknown,
120 : // treat this as a 304 Not Modified
121 0 : if (response.error() == httplib::Error::Unknown) {
122 0 : return Result{304, "", "", {}};
123 : }
124 0 : throw std::runtime_error(fmt::format("Error downloading '{}': {}", remote_url,
125 0 : httplib::to_string(response.error())));
126 : }
127 :
128 : // Parse the response
129 0 : Result result;
130 0 : if (response->has_header("x-ratelimit-remaining")) {
131 0 : result.rate_limit.remaining =
132 0 : std::stoi(response->get_header_value("x-ratelimit-remaining"));
133 : }
134 0 : if (response->has_header("x-ratelimit-reset")) {
135 0 : result.rate_limit.reset_time =
136 0 : std::stoi(response->get_header_value("x-ratelimit-reset"));
137 : }
138 0 : if (response->has_header("last-modified")) {
139 0 : result.last_modified = response->get_header_value("last-modified");
140 : }
141 0 : result.body = use_octet_stream ? std::move(streamed_body) : response->body;
142 0 : result.status_code = response->status;
143 :
144 0 : SPDLOG_DEBUG("Response status: {}", response->status);
145 0 : return result;
146 0 : });
147 0 : }
148 :
149 1 : GitHubDownloader::RateLimit GitHubDownloader::get_rate_limit() const {
150 : // This call now either returns valid rate limit data or throws an exception on error
151 1 : Result result = download("/rate_limit", "", false).get();
152 1 : if (result.status_code != 200) {
153 0 : throw std::runtime_error(
154 0 : fmt::format("Failed obtaining the rate limit: status code {}.", result.status_code));
155 : }
156 1 : return result.rate_limit;
157 1 : }
158 :
159 1 : std::string GitHubDownloader::get_host() const { return "https://" + host; }
160 :
161 : } // namespace pairinteraction
|