Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bundle httpfs by default #105

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions binding.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
"targets": [
{
"target_name": "<(module_name)",
"variables": {
"include_httpfs": "<!(echo ${DUCKDB_INCLUDE_HTTPFS})"
},
"sources": [
"src/duckdb_node.cpp",
"src/database.cpp",
Expand Down Expand Up @@ -397,6 +400,25 @@
"bcrypt.lib"
]
}
],
[
"include_httpfs=='true'",
{
"sources": [
"src/duckdb/extension/httpfs/create_secret_functions.cpp",
"src/duckdb/extension/httpfs/crypto.cpp",
"src/duckdb/extension/httpfs/hffs.cpp",
"src/duckdb/extension/httpfs/httpfs.cpp",
"src/duckdb/extension/httpfs/httpfs_extension.cpp",
"src/duckdb/extension/httpfs/s3fs.cpp"
],
"include_dirs": [
"src/duckdb/extension/httpfs/include"
],
"defines": [
"DUCKDB_EXTENSION_HTTPFS_LINKED"
]
}
]
],
"libraries": []
Expand Down
5 changes: 4 additions & 1 deletion binding.gyp.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"targets": [
{
"target_name": "<(module_name)",
"variables" : {
},
"sources": [
"src/duckdb_node.cpp",
"src/database.cpp",
Expand Down Expand Up @@ -67,7 +69,8 @@
"rstrtmgr.lib", "bcrypt.lib"
]
}
]
],
"${OPTIONAL_EXTENSIONS}"
],
"libraries": [
"${LIBRARY_FILES}"
Expand Down
250 changes: 250 additions & 0 deletions src/duckdb/extension/httpfs/create_secret_functions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
#include "create_secret_functions.hpp"
#include "s3fs.hpp"
#include "duckdb/main/extension_util.hpp"
#include "duckdb/common/local_file_system.hpp"

namespace duckdb {

void CreateS3SecretFunctions::Register(DatabaseInstance &instance) {
RegisterCreateSecretFunction(instance, "s3");
RegisterCreateSecretFunction(instance, "r2");
RegisterCreateSecretFunction(instance, "gcs");
}

unique_ptr<BaseSecret> CreateS3SecretFunctions::CreateSecretFunctionInternal(ClientContext &context,
CreateSecretInput &input,
S3AuthParams params) {
// for r2 we can set the endpoint using the account id
if (input.type == "r2" && input.options.find("account_id") != input.options.end()) {
params.endpoint = input.options["account_id"].ToString() + ".r2.cloudflarestorage.com";
}

// apply any overridden settings
for (const auto &named_param : input.options) {
auto lower_name = StringUtil::Lower(named_param.first);

if (lower_name == "key_id") {
params.access_key_id = named_param.second.ToString();
} else if (lower_name == "secret") {
params.secret_access_key = named_param.second.ToString();
} else if (lower_name == "region") {
params.region = named_param.second.ToString();
} else if (lower_name == "session_token") {
params.session_token = named_param.second.ToString();
} else if (lower_name == "endpoint") {
params.endpoint = named_param.second.ToString();
} else if (lower_name == "url_style") {
params.url_style = named_param.second.ToString();
} else if (lower_name == "use_ssl") {
if (named_param.second.type() != LogicalType::BOOLEAN) {
throw InvalidInputException("Invalid type past to secret option: '%s', found '%s', expected: 'BOOLEAN'",
lower_name, named_param.second.type().ToString());
}
params.use_ssl = named_param.second.GetValue<bool>();
} else if (lower_name == "url_compatibility_mode") {
if (named_param.second.type() != LogicalType::BOOLEAN) {
throw InvalidInputException("Invalid type past to secret option: '%s', found '%s', expected: 'BOOLEAN'",
lower_name, named_param.second.type().ToString());
}
params.s3_url_compatibility_mode = named_param.second.GetValue<bool>();
} else if (lower_name == "account_id") {
continue; // handled already
} else {
throw InternalException("Unknown named parameter passed to CreateSecretFunctionInternal: " + lower_name);
}
}

// Set scope to user provided scope or the default
auto scope = input.scope;
if (scope.empty()) {
if (input.type == "s3") {
scope.push_back("s3://");
scope.push_back("s3n://");
scope.push_back("s3a://");
} else if (input.type == "r2") {
scope.push_back("r2://");
} else if (input.type == "gcs") {
scope.push_back("gcs://");
scope.push_back("gs://");
} else {
throw InternalException("Unknown secret type found in httpfs extension: '%s'", input.type);
}
}

return S3SecretHelper::CreateSecret(scope, input.type, input.provider, input.name, params);
}

unique_ptr<BaseSecret> CreateS3SecretFunctions::CreateS3SecretFromSettings(ClientContext &context,
CreateSecretInput &input) {
auto &opener = context.client_data->file_opener;
FileOpenerInfo info;
auto params = S3AuthParams::ReadFrom(opener.get(), info);
return CreateSecretFunctionInternal(context, input, params);
}

unique_ptr<BaseSecret> CreateS3SecretFunctions::CreateS3SecretFromConfig(ClientContext &context,
CreateSecretInput &input) {
S3AuthParams empty_params;
empty_params.use_ssl = true;
empty_params.s3_url_compatibility_mode = false;
empty_params.region = "us-east-1";
empty_params.endpoint = "s3.amazonaws.com";

if (input.type == "gcs") {
empty_params.endpoint = "storage.googleapis.com";
}

if (input.type == "gcs" || input.type == "r2") {
empty_params.url_style = "path";
}

return CreateSecretFunctionInternal(context, input, empty_params);
}

void CreateS3SecretFunctions::SetBaseNamedParams(CreateSecretFunction &function, string &type) {
function.named_parameters["key_id"] = LogicalType::VARCHAR;
function.named_parameters["secret"] = LogicalType::VARCHAR;
function.named_parameters["region"] = LogicalType::VARCHAR;
function.named_parameters["session_token"] = LogicalType::VARCHAR;
function.named_parameters["endpoint"] = LogicalType::VARCHAR;
function.named_parameters["url_style"] = LogicalType::VARCHAR;
function.named_parameters["use_ssl"] = LogicalType::BOOLEAN;
function.named_parameters["url_compatibility_mode"] = LogicalType::BOOLEAN;

if (type == "r2") {
function.named_parameters["account_id"] = LogicalType::VARCHAR;
}
}

void CreateS3SecretFunctions::RegisterCreateSecretFunction(DatabaseInstance &instance, string type) {
// Register the new type
SecretType secret_type;
secret_type.name = type;
secret_type.deserializer = KeyValueSecret::Deserialize<KeyValueSecret>;
secret_type.default_provider = "config";

ExtensionUtil::RegisterSecretType(instance, secret_type);

CreateSecretFunction from_empty_config_fun2 = {type, "config", CreateS3SecretFromConfig};
CreateSecretFunction from_settings_fun2 = {type, "duckdb_settings", CreateS3SecretFromSettings};
SetBaseNamedParams(from_empty_config_fun2, type);
SetBaseNamedParams(from_settings_fun2, type);
ExtensionUtil::RegisterFunction(instance, from_empty_config_fun2);
ExtensionUtil::RegisterFunction(instance, from_settings_fun2);
}

void CreateBearerTokenFunctions::Register(DatabaseInstance &instance) {
// Generic Bearer secret
SecretType secret_type;
secret_type.name = GENERIC_BEARER_TYPE;
secret_type.deserializer = KeyValueSecret::Deserialize<KeyValueSecret>;
secret_type.default_provider = "config";
ExtensionUtil::RegisterSecretType(instance, secret_type);

// Generic Bearer config provider
CreateSecretFunction config_fun = {GENERIC_BEARER_TYPE, "config", CreateBearerSecretFromConfig};
config_fun.named_parameters["token"] = LogicalType::VARCHAR;
ExtensionUtil::RegisterFunction(instance, config_fun);

// HuggingFace secret
SecretType secret_type_hf;
secret_type_hf.name = HUGGINGFACE_TYPE;
secret_type_hf.deserializer = KeyValueSecret::Deserialize<KeyValueSecret>;
secret_type_hf.default_provider = "config";
ExtensionUtil::RegisterSecretType(instance, secret_type_hf);

// Huggingface config provider
CreateSecretFunction hf_config_fun = {HUGGINGFACE_TYPE, "config", CreateBearerSecretFromConfig};
hf_config_fun.named_parameters["token"] = LogicalType::VARCHAR;
ExtensionUtil::RegisterFunction(instance, hf_config_fun);

// Huggingface credential_chain provider
CreateSecretFunction hf_cred_fun = {HUGGINGFACE_TYPE, "credential_chain",
CreateHuggingFaceSecretFromCredentialChain};
ExtensionUtil::RegisterFunction(instance, hf_cred_fun);
}

unique_ptr<BaseSecret> CreateBearerTokenFunctions::CreateSecretFunctionInternal(ClientContext &context,
CreateSecretInput &input,
const string &token) {
// Set scope to user provided scope or the default
auto scope = input.scope;
if (scope.empty()) {
if (input.type == GENERIC_BEARER_TYPE) {
scope.push_back("");
} else if (input.type == HUGGINGFACE_TYPE) {
scope.push_back("hf://");
} else {
throw InternalException("Unknown secret type found in httpfs extension: '%s'", input.type);
}
}
auto return_value = make_uniq<KeyValueSecret>(scope, input.type, input.provider, input.name);

//! Set key value map
return_value->secret_map["token"] = token;

//! Set redact keys
return_value->redact_keys = {"token"};

return std::move(return_value);
}

unique_ptr<BaseSecret> CreateBearerTokenFunctions::CreateBearerSecretFromConfig(ClientContext &context,
CreateSecretInput &input) {
string token;

auto token_input = input.options.find("token");
for (const auto &named_param : input.options) {
auto lower_name = StringUtil::Lower(named_param.first);
if (lower_name == "token") {
token = named_param.second.ToString();
}
}

return CreateSecretFunctionInternal(context, input, token);
}

static string TryReadTokenFile(const string &token_path, const string error_source_message,
bool fail_on_exception = true) {
try {
LocalFileSystem fs;
auto handle = fs.OpenFile(token_path, {FileOpenFlags::FILE_FLAGS_READ});
return handle->ReadLine();
} catch (std::exception &ex) {
if (!fail_on_exception) {
return "";
}
ErrorData error(ex);
throw IOException("Failed to read token path '%s'%s. (error: %s)", token_path, error_source_message,
error.RawMessage());
}
}

unique_ptr<BaseSecret>
CreateBearerTokenFunctions::CreateHuggingFaceSecretFromCredentialChain(ClientContext &context,
CreateSecretInput &input) {
// Step 1: Try the ENV variable HF_TOKEN
const char *hf_token_env = std::getenv("HF_TOKEN");
if (hf_token_env) {
return CreateSecretFunctionInternal(context, input, hf_token_env);
}
// Step 2: Try the ENV variable HF_TOKEN_PATH
const char *hf_token_path_env = std::getenv("HF_TOKEN_PATH");
if (hf_token_path_env) {
auto token = TryReadTokenFile(hf_token_path_env, " fetched from HF_TOKEN_PATH env variable");
return CreateSecretFunctionInternal(context, input, token);
}

// Step 3: Try the path $HF_HOME/token
const char *hf_home_env = std::getenv("HF_HOME");
if (hf_home_env) {
auto token_path = LocalFileSystem().JoinPath(hf_home_env, "token");
auto token = TryReadTokenFile(token_path, " constructed using the HF_HOME variable: '$HF_HOME/token'");
return CreateSecretFunctionInternal(context, input, token);
}

// Step 4: Check the default path
auto token = TryReadTokenFile("~/.cache/huggingface/token", "", false);
return CreateSecretFunctionInternal(context, input, token);
}
} // namespace duckdb
27 changes: 27 additions & 0 deletions src/duckdb/extension/httpfs/crypto.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include "crypto.hpp"
#include "mbedtls_wrapper.hpp"

namespace duckdb {

void sha256(const char *in, size_t in_len, hash_bytes &out) {
duckdb_mbedtls::MbedTlsWrapper::ComputeSha256Hash(in, in_len, (char *)out);
}

void hmac256(const std::string &message, const char *secret, size_t secret_len, hash_bytes &out) {
duckdb_mbedtls::MbedTlsWrapper::Hmac256(secret, secret_len, message.data(), message.size(), (char *)out);
}

void hmac256(std::string message, hash_bytes secret, hash_bytes &out) {
hmac256(message, (char *)secret, sizeof(hash_bytes), out);
}

void hex256(hash_bytes &in, hash_str &out) {
const char *hex = "0123456789abcdef";
unsigned char *pin = in;
unsigned char *pout = out;
for (; pin < in + sizeof(in); pout += 2, pin++) {
pout[0] = hex[(*pin >> 4) & 0xF];
pout[1] = hex[*pin & 0xF];
}
}
} // namespace duckdb
Loading
Loading