Skip to content

Commit dee5a7d

Browse files
committed
Add support for custom ClientAssertionProvider in WorkloadIdentityTokenProvider
1 parent 840fc75 commit dee5a7d

File tree

4 files changed

+175
-37
lines changed

4 files changed

+175
-37
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@
5555
import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider;
5656
import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider;
5757
import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider;
58+
import org.apache.hadoop.fs.azurebfs.oauth2.ClientAssertionProvider;
5859
import org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider;
5960
import org.apache.hadoop.fs.azurebfs.oauth2.CustomTokenProviderAdapter;
6061
import org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider;
@@ -1277,12 +1278,38 @@ public AccessTokenProvider getTokenProvider() throws TokenAccessProviderExceptio
12771278
getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_MSI_TENANT);
12781279
String clientId =
12791280
getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID);
1280-
String tokenFile =
1281-
getTrimmedPasswordString(FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE,
1282-
AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE);
1283-
tokenProvider = new WorkloadIdentityTokenProvider(
1284-
authority, tenantGuid, clientId, tokenFile);
1285-
LOG.trace("WorkloadIdentityTokenProvider initialized");
1281+
1282+
// Check if a custom ClientAssertionProvider is configured
1283+
String clientAssertionProviderClassName =
1284+
getPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ASSERTION_PROVIDER_CLASS);
1285+
1286+
if (clientAssertionProviderClassName != null && !clientAssertionProviderClassName.trim().isEmpty()) {
1287+
// Use custom ClientAssertionProvider
1288+
try {
1289+
Class<?> providerClass = Class.forName(clientAssertionProviderClassName.trim());
1290+
ClientAssertionProvider clientAssertionProvider =
1291+
(ClientAssertionProvider) providerClass.getDeclaredConstructor().newInstance();
1292+
1293+
// Initialize the provider with configuration
1294+
clientAssertionProvider.initialize(rawConfig, accountName);
1295+
1296+
tokenProvider = new WorkloadIdentityTokenProvider(
1297+
authority, tenantGuid, clientId, clientAssertionProvider);
1298+
LOG.trace("WorkloadIdentityTokenProvider initialized with custom ClientAssertionProvider: {}",
1299+
clientAssertionProviderClassName);
1300+
} catch (Exception e) {
1301+
throw new TokenAccessProviderException(
1302+
"Failed to initialize custom ClientAssertionProvider: " + clientAssertionProviderClassName, e);
1303+
}
1304+
} else {
1305+
// Use file-based approach (backward compatibility)
1306+
String tokenFile =
1307+
getTrimmedPasswordString(FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE,
1308+
AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE);
1309+
tokenProvider = new WorkloadIdentityTokenProvider(
1310+
authority, tenantGuid, clientId, tokenFile);
1311+
LOG.trace("WorkloadIdentityTokenProvider initialized with file-based token");
1312+
}
12861313
} else {
12871314
throw new IllegalArgumentException("Failed to initialize " + tokenProviderClass);
12881315
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -339,6 +339,9 @@ public final class ConfigurationKeys {
339339
public static final String FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN_ENDPOINT = "fs.azure.account.oauth2.refresh.token.endpoint";
340340
/** Key for oauth AAD workload identity token file path: {@value}. */
341341
public static final String FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE = "fs.azure.account.oauth2.token.file";
342+
343+
/** Key for custom client assertion provider class for WorkloadIdentityTokenProvider */
344+
public static final String FS_AZURE_ACCOUNT_OAUTH_CLIENT_ASSERTION_PROVIDER_CLASS = "fs.azure.account.oauth2.client.assertion.provider.class";
342345
/** Key for enabling the tracking of ABFS API latency and sending the latency numbers to the ABFS API service */
343346
public static final String FS_AZURE_ABFS_LATENCY_TRACK = "fs.azure.abfs.latency.track";
344347

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.azurebfs.oauth2;
20+
21+
import java.io.IOException;
22+
23+
import org.apache.hadoop.classification.InterfaceAudience;
24+
import org.apache.hadoop.classification.InterfaceStability;
25+
import org.apache.hadoop.conf.Configuration;
26+
27+
/**
28+
* Interface for providing client assertions for Azure Workload Identity authentication.
29+
*
30+
* This interface allows custom implementations to provide JWT tokens through various mechanisms:
31+
* - Kubernetes Token Request API
32+
* - HashiCorp Vault
33+
* - Custom token services
34+
* - File-based tokens with custom logic
35+
*
36+
* Implementations should be thread-safe as they may be called concurrently.
37+
*/
38+
@InterfaceAudience.Public
39+
@InterfaceStability.Evolving
40+
public interface ClientAssertionProvider {
41+
42+
/**
43+
* Initializes the provider with the given configuration.
44+
* This method is called once after the provider is instantiated via reflection.
45+
*
46+
* @param configuration Hadoop configuration containing provider-specific settings
47+
* @param accountName Azure storage account name for account-specific configuration
48+
* @throws IOException if initialization fails
49+
*/
50+
void initialize(Configuration configuration, String accountName) throws IOException;
51+
52+
/**
53+
* Retrieves a client assertion (JWT token) for Azure Workload Identity authentication.
54+
*
55+
* The returned string should be a valid JWT token that can be used as a client assertion
56+
* in OAuth 2.0 client credentials flow with JWT bearer assertion.
57+
*
58+
* @return JWT token as a string
59+
* @throws IOException if token retrieval fails
60+
*/
61+
String getClientAssertion() throws IOException;
62+
63+
/**
64+
* Optional: Cleanup resources when the provider is no longer needed.
65+
* Default implementation does nothing.
66+
*
67+
* @throws IOException if cleanup fails
68+
*/
69+
default void close() throws IOException {
70+
// Default: no-op
71+
}
72+
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/WorkloadIdentityTokenProvider.java

Lines changed: 67 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020

2121
import java.io.File;
2222
import java.io.IOException;
23-
24-
import org.slf4j.Logger;
25-
import org.slf4j.LoggerFactory;
2623
import org.apache.commons.io.FileUtils;
2724
import org.apache.hadoop.classification.VisibleForTesting;
25+
import org.apache.hadoop.conf.Configuration;
2826
import org.apache.hadoop.thirdparty.com.google.common.base.Strings;
2927
import org.apache.hadoop.util.Preconditions;
28+
import org.slf4j.Logger;
29+
import org.slf4j.LoggerFactory;
3030

3131
/**
3232
* Provides tokens based on Azure AD Workload Identity.
@@ -38,11 +38,72 @@ public class WorkloadIdentityTokenProvider extends AccessTokenProvider {
3838
private static final String EMPTY_TOKEN_FILE_ERROR = "Empty token file found at specified path: ";
3939
private static final String TOKEN_FILE_READ_ERROR = "Error reading token file at specified path: ";
4040

41+
/**
42+
* Internal implementation of ClientAssertionProvider for file-based token reading.
43+
* This provides backward compatibility for the file-based constructor.
44+
*/
45+
private static class FileBasedClientAssertionProvider implements ClientAssertionProvider {
46+
private final String tokenFile;
47+
48+
public FileBasedClientAssertionProvider(String tokenFile) {
49+
this.tokenFile = tokenFile;
50+
}
51+
52+
@Override
53+
public void initialize(Configuration configuration, String accountName) throws IOException {
54+
// No initialization needed for file-based provider
55+
}
56+
57+
@Override
58+
public String getClientAssertion() throws IOException {
59+
String clientAssertion = "";
60+
try {
61+
File file = new File(tokenFile);
62+
clientAssertion = FileUtils.readFileToString(file, "UTF-8");
63+
} catch (Exception e) {
64+
throw new IOException(TOKEN_FILE_READ_ERROR + tokenFile, e);
65+
}
66+
if (Strings.isNullOrEmpty(clientAssertion)) {
67+
throw new IOException(EMPTY_TOKEN_FILE_ERROR + tokenFile);
68+
}
69+
return clientAssertion;
70+
}
71+
}
72+
4173
private final String authEndpoint;
4274
private final String clientId;
43-
private final String tokenFile;
75+
private final ClientAssertionProvider clientAssertionProvider;
4476
private long tokenFetchTime = -1;
4577

78+
/**
79+
* Constructor with custom ClientAssertionProvider.
80+
* Use this for custom token retrieval mechanisms like Kubernetes Token Request API.
81+
*
82+
* @param authority OAuth authority URL
83+
* @param tenantId Azure AD tenant ID
84+
* @param clientId Azure AD client ID
85+
* @param clientAssertionProvider Custom provider for client assertions
86+
*/
87+
public WorkloadIdentityTokenProvider(final String authority, final String tenantId,
88+
final String clientId, ClientAssertionProvider clientAssertionProvider) {
89+
Preconditions.checkNotNull(authority, "authority");
90+
Preconditions.checkNotNull(tenantId, "tenantId");
91+
Preconditions.checkNotNull(clientId, "clientId");
92+
Preconditions.checkNotNull(clientAssertionProvider, "clientAssertionProvider");
93+
94+
this.authEndpoint = authority + tenantId + OAUTH2_TOKEN_PATH;
95+
this.clientId = clientId;
96+
this.clientAssertionProvider = clientAssertionProvider;
97+
}
98+
99+
/**
100+
* Constructor with file-based token reading (backward compatibility).
101+
*
102+
* @param authority OAuth authority URL
103+
* @param tenantId Azure AD tenant ID
104+
* @param clientId Azure AD client ID
105+
* @param tokenFile Path to file containing the JWT token
106+
*/
46107
public WorkloadIdentityTokenProvider(final String authority, final String tenantId,
47108
final String clientId, final String tokenFile) {
48109
Preconditions.checkNotNull(authority, "authority");
@@ -52,13 +113,13 @@ public WorkloadIdentityTokenProvider(final String authority, final String tenant
52113

53114
this.authEndpoint = authority + tenantId + OAUTH2_TOKEN_PATH;
54115
this.clientId = clientId;
55-
this.tokenFile = tokenFile;
116+
this.clientAssertionProvider = new FileBasedClientAssertionProvider(tokenFile);
56117
}
57118

58119
@Override
59120
protected AzureADToken refreshToken() throws IOException {
60121
LOG.debug("AADToken: refreshing token from JWT Assertion");
61-
String clientAssertion = getClientAssertion();
122+
String clientAssertion = clientAssertionProvider.getClientAssertion();
62123
AzureADToken token = getTokenUsingJWTAssertion(clientAssertion);
63124
tokenFetchTime = System.currentTimeMillis();
64125
return token;
@@ -90,31 +151,6 @@ protected boolean isTokenAboutToExpire() {
90151
return expiring;
91152
}
92153

93-
/**
94-
* Gets the client assertion from the token file.
95-
* The token file should contain the client assertion in JWT format.
96-
* It should be a String containing Base64Url encoded JSON Web Token (JWT).
97-
* See <a href="https://azure.github.io/azure-workload-identity/docs/faq.html#does-workload-identity-work-in-disconnected-environments">
98-
* Azure Workload Identity FAQ</a>.
99-
*
100-
* @return the client assertion.
101-
* @throws IOException if the token file is empty.
102-
*/
103-
private String getClientAssertion()
104-
throws IOException {
105-
String clientAssertion = "";
106-
try {
107-
File file = new File(tokenFile);
108-
clientAssertion = FileUtils.readFileToString(file, "UTF-8");
109-
} catch (Exception e) {
110-
throw new IOException(TOKEN_FILE_READ_ERROR + tokenFile, e);
111-
}
112-
if (Strings.isNullOrEmpty(clientAssertion)) {
113-
throw new IOException(EMPTY_TOKEN_FILE_ERROR + tokenFile);
114-
}
115-
return clientAssertion;
116-
}
117-
118154
/**
119155
* Gets the Azure AD token from a client assertion in JWT format.
120156
* This method exists to make unit testing possible.

0 commit comments

Comments
 (0)