Line data Source code
1 : // SPDX-FileCopyrightText: 2025 PairInteraction Developers
2 : // SPDX-License-Identifier: LGPL-3.0-or-later
3 :
4 : #include "pairinteraction/database/GitHubDownloader.hpp"
5 :
6 : #include "pairinteraction/utils/paths.hpp"
7 :
8 : #include <filesystem>
9 : #include <fmt/core.h>
10 : #include <fmt/format.h>
11 : #include <fstream>
12 : #include <future>
13 : #include <httplib.h>
14 : #include <spdlog/spdlog.h>
15 : #include <stdexcept>
16 :
17 : namespace pairinteraction {
18 : namespace {
19 :
20 7 : std::filesystem::path &ca_bundle_path() {
21 7 : static std::filesystem::path path;
22 7 : return path;
23 : }
24 :
25 6 : bool load_ca_bundle_from_path(httplib::SSLClient &client, const std::filesystem::path &path) {
26 6 : if (path.empty() || !std::filesystem::is_regular_file(path)) {
27 0 : return false;
28 : }
29 :
30 6 : std::ifstream in(path, std::ios::binary);
31 6 : std::string pem((std::istreambuf_iterator<char>(in)), {});
32 :
33 6 : if (!in || pem.empty()) {
34 0 : SPDLOG_WARN("Failed to read CA bundle from {}.", path.string());
35 0 : return false;
36 : }
37 :
38 6 : client.load_ca_cert_store(pem.data(), pem.size());
39 6 : SPDLOG_INFO("Using CA bundle from {}.", path.string());
40 6 : return true;
41 6 : }
42 :
43 0 : void log(const httplib::Request &req, const httplib::Response &res) {
44 0 : if (!spdlog::default_logger()->should_log(spdlog::level::debug)) {
45 0 : return;
46 : }
47 :
48 0 : SPDLOG_DEBUG("[httplib] {} {}", req.method, req.path);
49 0 : for (const auto &[k, v] : req.headers) {
50 0 : SPDLOG_DEBUG("[httplib] {}: {}\n", k, v);
51 : }
52 :
53 0 : SPDLOG_DEBUG("[httplib] Response with status {}", res.status);
54 0 : for (const auto &[k, v] : res.headers) {
55 0 : SPDLOG_DEBUG("[httplib] {}: {}\n", k, v);
56 : }
57 :
58 0 : if (res.body.empty()) {
59 0 : return;
60 : }
61 :
62 0 : if (res.body.size() > 1024) {
63 0 : SPDLOG_DEBUG("[httplib] Body ({} bytes, first 1024 bytes):", res.body.size());
64 : } else {
65 0 : SPDLOG_DEBUG("[httplib] Body ({} bytes):", res.body.size());
66 : }
67 0 : SPDLOG_DEBUG("[httplib] {}", res.body.substr(0, 1024));
68 : }
69 :
70 : } // namespace
71 :
72 1 : void set_ca_bundle_path(std::filesystem::path path) { ca_bundle_path() = std::move(path); }
73 :
74 6 : std::filesystem::path get_ca_bundle_path() { return ca_bundle_path(); }
75 :
76 6 : GitHubDownloader::GitHubDownloader() : client(std::make_unique<httplib::SSLClient>(host)) {
77 6 : client->set_follow_location(true);
78 6 : client->set_connection_timeout(5, 0); // seconds
79 6 : client->set_read_timeout(60, 0); // seconds
80 6 : client->set_write_timeout(1, 0); // seconds
81 6 : client->set_logger(log);
82 :
83 6 : if (const auto configured_path = get_ca_bundle_path(); !configured_path.empty()) {
84 6 : load_ca_bundle_from_path(*client, configured_path);
85 6 : }
86 6 : }
87 :
88 7 : GitHubDownloader::~GitHubDownloader() = default;
89 :
90 : std::future<GitHubDownloader::Result>
91 0 : GitHubDownloader::download(const std::string &remote_url, const std::string &if_modified_since,
92 : bool use_octet_stream) const {
93 : return std::async(
94 0 : std::launch::async, [this, remote_url, if_modified_since, use_octet_stream]() -> Result {
95 0 : SPDLOG_DEBUG("Downloading from GitHub: {}", remote_url);
96 :
97 : // Prepare headers
98 : httplib::Headers headers{
99 : {"User-Agent", "pairinteraction"},
100 : {"X-GitHub-Api-Version", "2022-11-28"},
101 : {"Accept",
102 0 : use_octet_stream ? "application/octet-stream" : "application/vnd.github+json"}};
103 :
104 0 : if (!if_modified_since.empty()) {
105 0 : headers.emplace("if-modified-since", if_modified_since);
106 : }
107 :
108 : // Use the GitHub token if available; otherwise, if we have a conditional request,
109 : // insert a dummy authorization header to avoid increasing rate limits
110 0 : if (auto *token = std::getenv("GITHUB_TOKEN"); token) {
111 0 : headers.emplace("Authorization", fmt::format("Bearer {}", token));
112 0 : } else if (!if_modified_since.empty()) {
113 0 : headers.emplace("Authorization",
114 : "avoids-an-increase-in-ratelimits-used-if-304-is-returned");
115 : }
116 :
117 : // If we're fetching binary, stream with a progress callback; otherwise use a simple get
118 0 : httplib::Result response;
119 0 : std::string streamed_body;
120 0 : if (use_octet_stream) {
121 0 : auto content_receiver = [&](const char *data, size_t len) {
122 0 : streamed_body.append(data, len);
123 0 : return true;
124 0 : };
125 :
126 : // Progress display
127 0 : int last_pct = -1;
128 0 : auto progress_display = [&last_pct, remote_url](uint64_t cur, uint64_t total) {
129 0 : if (total == 0) {
130 0 : fmt::print(stderr, "\rDownloading {}...", remote_url);
131 0 : (void)std::fflush(stderr);
132 0 : } else if (int pct = static_cast<int>((cur * 100) / total); pct != last_pct) {
133 0 : last_pct = pct;
134 0 : fmt::print(stderr, "\rDownloading {}... {:3d}%", remote_url, pct);
135 0 : (void)std::fflush(stderr);
136 : }
137 0 : return true;
138 0 : };
139 :
140 0 : response = client->Get(remote_url, headers, content_receiver, progress_display);
141 :
142 : // Ensure the progress display ends cleanly if we showed it
143 0 : if (last_pct >= 0) {
144 0 : fmt::print(stderr, "\n");
145 0 : (void)std::fflush(stderr);
146 : }
147 0 : } else {
148 0 : response = client->Get(remote_url, headers);
149 : }
150 :
151 : // Handle if the response is null
152 0 : if (!response) {
153 : // Defensive handling: if response is null and the error is unknown,
154 : // treat this as a 304 Not Modified
155 0 : if (response.error() == httplib::Error::Unknown) {
156 0 : return Result{304, "", "", {}};
157 : }
158 0 : throw std::runtime_error(fmt::format("Error downloading '{}': {}", remote_url,
159 0 : httplib::to_string(response.error())));
160 : }
161 :
162 : // Parse the response
163 0 : Result result;
164 0 : if (response->has_header("x-ratelimit-remaining")) {
165 0 : result.rate_limit.remaining =
166 0 : std::stoi(response->get_header_value("x-ratelimit-remaining"));
167 : }
168 0 : if (response->has_header("x-ratelimit-reset")) {
169 0 : result.rate_limit.reset_time =
170 0 : std::stoi(response->get_header_value("x-ratelimit-reset"));
171 : }
172 0 : if (response->has_header("last-modified")) {
173 0 : result.last_modified = response->get_header_value("last-modified");
174 : }
175 0 : result.body = use_octet_stream ? std::move(streamed_body) : response->body;
176 0 : result.status_code = response->status;
177 :
178 0 : SPDLOG_DEBUG("Response status: {}", response->status);
179 0 : return result;
180 0 : });
181 0 : }
182 :
183 1 : std::string GitHubDownloader::get_host() const { return "https://" + host; }
184 :
185 : } // namespace pairinteraction
|