Line data Source code
1 : // SPDX-FileCopyrightText: 2025 PairInteraction Developers
2 : // SPDX-License-Identifier: LGPL-3.0-or-later
3 :
4 : #include "pairinteraction/database/GitHubDownloader.hpp"
5 :
6 : #include "pairinteraction/utils/paths.hpp"
7 :
8 : #include <filesystem>
9 : #include <fmt/core.h>
10 : #include <fstream>
11 : #include <future>
12 : #include <httplib.h>
13 : #include <spdlog/spdlog.h>
14 : #include <stdexcept>
15 :
16 : namespace pairinteraction {
17 : namespace {
18 :
19 6 : std::filesystem::path &ca_bundle_path() {
20 6 : static std::filesystem::path path;
21 6 : return path;
22 : }
23 :
24 5 : bool load_ca_bundle_from_path(httplib::SSLClient &client, const std::filesystem::path &path) {
25 5 : if (path.empty() || !std::filesystem::is_regular_file(path)) {
26 0 : return false;
27 : }
28 :
29 5 : std::ifstream in(path, std::ios::binary);
30 5 : std::string pem((std::istreambuf_iterator<char>(in)), {});
31 :
32 5 : if (!in || pem.empty()) {
33 0 : SPDLOG_WARN("Failed to read CA bundle from {}.", path.string());
34 0 : return false;
35 : }
36 :
37 5 : client.load_ca_cert_store(pem.data(), pem.size());
38 5 : SPDLOG_INFO("Using CA bundle from {}.", path.string());
39 5 : return true;
40 5 : }
41 :
42 0 : void log(const httplib::Request &req, const httplib::Response &res) {
43 0 : if (!spdlog::default_logger()->should_log(spdlog::level::debug)) {
44 0 : return;
45 : }
46 :
47 0 : SPDLOG_DEBUG("[httplib] {} {}", req.method, req.path);
48 0 : for (const auto &[k, v] : req.headers) {
49 0 : SPDLOG_DEBUG("[httplib] {}: {}\n", k, v);
50 : }
51 :
52 0 : SPDLOG_DEBUG("[httplib] Response with status {}", res.status);
53 0 : for (const auto &[k, v] : res.headers) {
54 0 : SPDLOG_DEBUG("[httplib] {}: {}\n", k, v);
55 : }
56 :
57 0 : if (res.body.empty()) {
58 0 : return;
59 : }
60 :
61 0 : if (res.body.size() > 1024) {
62 0 : SPDLOG_DEBUG("[httplib] Body ({} bytes, first 1024 bytes):", res.body.size());
63 : } else {
64 0 : SPDLOG_DEBUG("[httplib] Body ({} bytes):", res.body.size());
65 : }
66 0 : SPDLOG_DEBUG("[httplib] {}", res.body.substr(0, 1024));
67 : }
68 :
69 : } // namespace
70 :
71 1 : void set_ca_bundle_path(std::filesystem::path path) { ca_bundle_path() = std::move(path); }
72 :
73 5 : std::filesystem::path get_ca_bundle_path() { return ca_bundle_path(); }
74 :
75 5 : GitHubDownloader::GitHubDownloader() : client(std::make_unique<httplib::SSLClient>(host)) {
76 5 : client->set_follow_location(true);
77 5 : client->set_connection_timeout(5, 0); // seconds
78 5 : client->set_read_timeout(60, 0); // seconds
79 5 : client->set_write_timeout(1, 0); // seconds
80 5 : client->set_logger(log);
81 :
82 5 : if (const auto configured_path = get_ca_bundle_path(); !configured_path.empty()) {
83 5 : load_ca_bundle_from_path(*client, configured_path);
84 5 : }
85 5 : }
86 :
87 5 : GitHubDownloader::~GitHubDownloader() = default;
88 :
89 : std::future<GitHubDownloader::Result>
90 0 : GitHubDownloader::download(const std::string &remote_url, const std::string &if_modified_since,
91 : bool use_octet_stream) const {
92 : return std::async(
93 0 : std::launch::async, [this, remote_url, if_modified_since, use_octet_stream]() -> Result {
94 0 : SPDLOG_DEBUG("Downloading from GitHub: {}", remote_url);
95 :
96 : // Prepare headers
97 : httplib::Headers headers{
98 : {"User-Agent", "pairinteraction"},
99 : {"X-GitHub-Api-Version", "2022-11-28"},
100 : {"Accept",
101 0 : use_octet_stream ? "application/octet-stream" : "application/vnd.github+json"}};
102 :
103 0 : if (!if_modified_since.empty()) {
104 0 : headers.emplace("if-modified-since", if_modified_since);
105 : }
106 :
107 : // Use the GitHub token if available; otherwise, if we have a conditional request,
108 : // insert a dummy authorization header to avoid increasing rate limits
109 0 : if (auto *token = std::getenv("GITHUB_TOKEN"); token) {
110 0 : headers.emplace("Authorization", fmt::format("Bearer {}", token));
111 0 : } else if (!if_modified_since.empty()) {
112 0 : headers.emplace("Authorization",
113 : "avoids-an-increase-in-ratelimits-used-if-304-is-returned");
114 : }
115 :
116 : // If we're fetching binary, stream with a progress callback; otherwise use a simple get
117 0 : httplib::Result response;
118 0 : std::string streamed_body;
119 0 : if (use_octet_stream) {
120 0 : auto content_receiver = [&](const char *data, size_t len) {
121 0 : streamed_body.append(data, len);
122 0 : return true;
123 0 : };
124 :
125 : // Progress display
126 0 : int last_pct = -1;
127 0 : auto progress_display = [&last_pct, remote_url](uint64_t cur, uint64_t total) {
128 0 : if (total == 0) {
129 0 : fmt::print(stderr, "\rDownloading {}...", remote_url);
130 0 : (void)std::fflush(stderr);
131 0 : } else if (int pct = static_cast<int>((cur * 100) / total); pct != last_pct) {
132 0 : last_pct = pct;
133 0 : fmt::print(stderr, "\rDownloading {}... {:3d}%", remote_url, pct);
134 0 : (void)std::fflush(stderr);
135 : }
136 0 : return true;
137 0 : };
138 :
139 0 : response = client->Get(remote_url, headers, content_receiver, progress_display);
140 :
141 : // Ensure the progress display ends cleanly if we showed it
142 0 : if (last_pct >= 0) {
143 0 : fmt::print(stderr, "\n");
144 0 : (void)std::fflush(stderr);
145 : }
146 0 : } else {
147 0 : response = client->Get(remote_url, headers);
148 : }
149 :
150 : // Handle if the response is null
151 0 : if (!response) {
152 : // Defensive handling: if response is null and the error is unknown,
153 : // treat this as a 304 Not Modified
154 0 : if (response.error() == httplib::Error::Unknown) {
155 0 : return Result{304, "", "", {}};
156 : }
157 0 : throw std::runtime_error(fmt::format("Error downloading '{}': {}", remote_url,
158 0 : httplib::to_string(response.error())));
159 : }
160 :
161 : // Parse the response
162 0 : Result result;
163 0 : if (response->has_header("x-ratelimit-remaining")) {
164 0 : result.rate_limit.remaining =
165 0 : std::stoi(response->get_header_value("x-ratelimit-remaining"));
166 : }
167 0 : if (response->has_header("x-ratelimit-reset")) {
168 0 : result.rate_limit.reset_time =
169 0 : std::stoi(response->get_header_value("x-ratelimit-reset"));
170 : }
171 0 : if (response->has_header("last-modified")) {
172 0 : result.last_modified = response->get_header_value("last-modified");
173 : }
174 0 : result.body = use_octet_stream ? std::move(streamed_body) : response->body;
175 0 : result.status_code = response->status;
176 :
177 0 : SPDLOG_DEBUG("Response status: {}", response->status);
178 0 : return result;
179 0 : });
180 0 : }
181 :
182 1 : std::string GitHubDownloader::get_host() const { return "https://" + host; }
183 :
184 : } // namespace pairinteraction
|