Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider;
import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider;
import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider;
import org.apache.hadoop.fs.azurebfs.oauth2.ClientAssertionProvider;
import org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider;
import org.apache.hadoop.fs.azurebfs.oauth2.CustomTokenProviderAdapter;
import org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider;
Expand Down Expand Up @@ -1277,12 +1278,38 @@
getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_MSI_TENANT);
String clientId =
getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID);
String tokenFile =
getTrimmedPasswordString(FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE,
AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE);
tokenProvider = new WorkloadIdentityTokenProvider(
authority, tenantGuid, clientId, tokenFile);
LOG.trace("WorkloadIdentityTokenProvider initialized");

Check failure on line 1281 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java#L1281

blanks: end of line
// Check if a custom ClientAssertionProvider is configured
String clientAssertionProviderClassName =

Check failure on line 1283 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java#L1283

blanks: end of line
getPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ASSERTION_PROVIDER_CLASS);

Check failure on line 1285 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java#L1285

blanks: end of line
if (clientAssertionProviderClassName != null && !clientAssertionProviderClassName.trim().isEmpty()) {
// Use custom ClientAssertionProvider
try {
Class<?> providerClass = Class.forName(clientAssertionProviderClassName.trim());
ClientAssertionProvider clientAssertionProvider =

Check failure on line 1290 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java#L1290

blanks: end of line
(ClientAssertionProvider) providerClass.getDeclaredConstructor().newInstance();

Check failure on line 1292 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java#L1292

blanks: end of line
// Initialize the provider with configuration
clientAssertionProvider.initialize(rawConfig, accountName);

Check failure on line 1295 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java#L1295

blanks: end of line
tokenProvider = new WorkloadIdentityTokenProvider(
authority, tenantGuid, clientId, clientAssertionProvider);
LOG.trace("WorkloadIdentityTokenProvider initialized with custom ClientAssertionProvider: {}",

Check failure on line 1298 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java#L1298

blanks: end of line
clientAssertionProviderClassName);
} catch (Exception e) {
throw new TokenAccessProviderException(
"Failed to initialize custom ClientAssertionProvider: " + clientAssertionProviderClassName, e);
}
} else {
// Use file-based approach (backward compatibility)
String tokenFile =
getTrimmedPasswordString(FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE,
AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE);
tokenProvider = new WorkloadIdentityTokenProvider(
authority, tenantGuid, clientId, tokenFile);
LOG.trace("WorkloadIdentityTokenProvider initialized with file-based token");
}
} else {
throw new IllegalArgumentException("Failed to initialize " + tokenProviderClass);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,9 @@
public static final String FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN_ENDPOINT = "fs.azure.account.oauth2.refresh.token.endpoint";
/** Key for oauth AAD workload identity token file path: {@value}. */
public static final String FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE = "fs.azure.account.oauth2.token.file";

Check failure on line 342 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java#L342

blanks: end of line
/** Key for custom client assertion provider class for WorkloadIdentityTokenProvider */
public static final String FS_AZURE_ACCOUNT_OAUTH_CLIENT_ASSERTION_PROVIDER_CLASS = "fs.azure.account.oauth2.client.assertion.provider.class";
/** Key for enabling the tracking of ABFS API latency and sending the latency numbers to the ABFS API service */
public static final String FS_AZURE_ABFS_LATENCY_TRACK = "fs.azure.abfs.latency.track";

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.azurebfs.oauth2;

import java.io.IOException;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;

/**
* Interface for providing client assertions for Azure Workload Identity authentication.
*

Check failure on line 29 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientAssertionProvider.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientAssertionProvider.java#L29

blanks: end of line
* This interface allows custom implementations to provide JWT tokens through various mechanisms:
* - Kubernetes Token Request API
* - HashiCorp Vault
* - Custom token services
* - File-based tokens with custom logic
*

Check failure on line 35 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientAssertionProvider.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientAssertionProvider.java#L35

blanks: end of line
* Implementations should be thread-safe as they may be called concurrently.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public interface ClientAssertionProvider {

/**
* Initializes the provider with the given configuration.
* This method is called once after the provider is instantiated via reflection.
*

Check failure on line 45 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientAssertionProvider.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientAssertionProvider.java#L45

blanks: end of line
* @param configuration Hadoop configuration containing provider-specific settings
* @param accountName Azure storage account name for account-specific configuration
* @throws IOException if initialization fails
*/
void initialize(Configuration configuration, String accountName) throws IOException;

/**
* Retrieves a client assertion (JWT token) for Azure Workload Identity authentication.
*

Check failure on line 54 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientAssertionProvider.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientAssertionProvider.java#L54

blanks: end of line
* The returned string should be a valid JWT token that can be used as a client assertion
* in OAuth 2.0 client credentials flow with JWT bearer assertion.
*

Check failure on line 57 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientAssertionProvider.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientAssertionProvider.java#L57

blanks: end of line
* @return JWT token as a string
* @throws IOException if token retrieval fails
*/
String getClientAssertion() throws IOException;

/**
* Optional: Cleanup resources when the provider is no longer needed.
* Default implementation does nothing.
*

Check failure on line 66 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientAssertionProvider.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/ClientAssertionProvider.java#L66

blanks: end of line
* @throws IOException if cleanup fails
*/
default void close() throws IOException {
// Default: no-op
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@

import java.io.File;
import java.io.IOException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.thirdparty.com.google.common.base.Strings;
import org.apache.hadoop.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Provides tokens based on Azure AD Workload Identity.
Expand All @@ -38,11 +38,72 @@
private static final String EMPTY_TOKEN_FILE_ERROR = "Empty token file found at specified path: ";
private static final String TOKEN_FILE_READ_ERROR = "Error reading token file at specified path: ";

/**
* Internal implementation of ClientAssertionProvider for file-based token reading.
* This provides backward compatibility for the file-based constructor.
*/
private static class FileBasedClientAssertionProvider implements ClientAssertionProvider {
private final String tokenFile;

public FileBasedClientAssertionProvider(String tokenFile) {
this.tokenFile = tokenFile;
}

@Override
public void initialize(Configuration configuration, String accountName) throws IOException {
// No initialization needed for file-based provider
}

@Override
public String getClientAssertion() throws IOException {
String clientAssertion = "";
try {
File file = new File(tokenFile);
clientAssertion = FileUtils.readFileToString(file, "UTF-8");
} catch (Exception e) {
throw new IOException(TOKEN_FILE_READ_ERROR + tokenFile, e);
}
if (Strings.isNullOrEmpty(clientAssertion)) {
throw new IOException(EMPTY_TOKEN_FILE_ERROR + tokenFile);
}
return clientAssertion;
}
}

private final String authEndpoint;
private final String clientId;
private final String tokenFile;
private final ClientAssertionProvider clientAssertionProvider;
private long tokenFetchTime = -1;

/**
* Constructor with custom ClientAssertionProvider.
* Use this for custom token retrieval mechanisms like Kubernetes Token Request API.
*

Check failure on line 81 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/WorkloadIdentityTokenProvider.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/WorkloadIdentityTokenProvider.java#L81

blanks: end of line
* @param authority OAuth authority URL
* @param tenantId Azure AD tenant ID
* @param clientId Azure AD client ID

Check failure on line 84 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/WorkloadIdentityTokenProvider.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/WorkloadIdentityTokenProvider.java#L84

blanks: end of line
* @param clientAssertionProvider Custom provider for client assertions
*/
public WorkloadIdentityTokenProvider(final String authority, final String tenantId,
final String clientId, ClientAssertionProvider clientAssertionProvider) {
Preconditions.checkNotNull(authority, "authority");
Preconditions.checkNotNull(tenantId, "tenantId");
Preconditions.checkNotNull(clientId, "clientId");
Preconditions.checkNotNull(clientAssertionProvider, "clientAssertionProvider");

this.authEndpoint = authority + tenantId + OAUTH2_TOKEN_PATH;
this.clientId = clientId;
this.clientAssertionProvider = clientAssertionProvider;
}

/**
* Constructor with file-based token reading (backward compatibility).
*

Check failure on line 101 in hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/WorkloadIdentityTokenProvider.java

View check run for this annotation

ASF Cloudbees Jenkins ci-hadoop / Apache Yetus

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/WorkloadIdentityTokenProvider.java#L101

blanks: end of line
* @param authority OAuth authority URL
* @param tenantId Azure AD tenant ID
* @param clientId Azure AD client ID
* @param tokenFile Path to file containing the JWT token
*/
public WorkloadIdentityTokenProvider(final String authority, final String tenantId,
final String clientId, final String tokenFile) {
Preconditions.checkNotNull(authority, "authority");
Expand All @@ -52,13 +113,13 @@

this.authEndpoint = authority + tenantId + OAUTH2_TOKEN_PATH;
this.clientId = clientId;
this.tokenFile = tokenFile;
this.clientAssertionProvider = new FileBasedClientAssertionProvider(tokenFile);
}

@Override
protected AzureADToken refreshToken() throws IOException {
LOG.debug("AADToken: refreshing token from JWT Assertion");
String clientAssertion = getClientAssertion();
String clientAssertion = clientAssertionProvider.getClientAssertion();
AzureADToken token = getTokenUsingJWTAssertion(clientAssertion);
tokenFetchTime = System.currentTimeMillis();
return token;
Expand Down Expand Up @@ -90,31 +151,6 @@
return expiring;
}

/**
* Gets the client assertion from the token file.
* The token file should contain the client assertion in JWT format.
* It should be a String containing Base64Url encoded JSON Web Token (JWT).
* See <a href="https://azure.github.io/azure-workload-identity/docs/faq.html#does-workload-identity-work-in-disconnected-environments">
* Azure Workload Identity FAQ</a>.
*
* @return the client assertion.
* @throws IOException if the token file is empty.
*/
private String getClientAssertion()
throws IOException {
String clientAssertion = "";
try {
File file = new File(tokenFile);
clientAssertion = FileUtils.readFileToString(file, "UTF-8");
} catch (Exception e) {
throw new IOException(TOKEN_FILE_READ_ERROR + tokenFile, e);
}
if (Strings.isNullOrEmpty(clientAssertion)) {
throw new IOException(EMPTY_TOKEN_FILE_ERROR + tokenFile);
}
return clientAssertion;
}

/**
* Gets the Azure AD token from a client assertion in JWT format.
* This method exists to make unit testing possible.
Expand Down