Skip to content

Allow token bearer authentication through environment variables #799

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 81 additions & 45 deletions src/kaggle/api/kaggle_api_extended.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@

import csv
from datetime import datetime
from enum import Enum
import io

import json # Needed by mypy.
import logging
import os

import re # Needed by mypy.
Expand All @@ -48,7 +50,7 @@
from google.protobuf import field_mask_pb2

import kaggle
from kagglesdk import KaggleClient, KaggleEnv # type: ignore[attr-defined]
from kagglesdk import get_access_token_from_env, KaggleClient, KaggleEnv # type: ignore[attr-defined]
from kagglesdk.admin.types.inbox_file_service import CreateInboxFileRequest
from kagglesdk.blobs.types.blob_api_service import ApiStartBlobUploadRequest, ApiStartBlobUploadResponse, ApiBlobType
from kagglesdk.competitions.types.competition_api_service import (
Expand Down Expand Up @@ -150,6 +152,14 @@
T = TypeVar('T')


class AuthMethod(Enum):
LEGACY_API_KEY = 0
ACCESS_TOKEN = 1

def __str__(self):
return self.name


class DirectoryArchive(object):

def __init__(self, fullpath, fmt):
Expand Down Expand Up @@ -365,7 +375,9 @@ class KaggleApi:
CONFIG_NAME_COMPETITION = 'competition'
CONFIG_NAME_PATH = 'path'
CONFIG_NAME_USER = 'username'
CONFIG_NAME_AUTH_METHOD = 'auth_method'
CONFIG_NAME_KEY = 'key'
CONFIG_NAME_TOKEN = 'token'
CONFIG_NAME_SSL_CA_CERT = 'ssl_ca_cert'

HEADER_API_VERSION = 'X-Kaggle-ApiVersion'
Expand Down Expand Up @@ -398,10 +410,12 @@ class KaggleApi:

args: List[str] = []
if os.environ.get('KAGGLE_API_ENVIRONMENT') == 'LOCALHOST':
# Make it verbose when running in the debugger.
args = ['--verbose', '--local']
elif os.environ.get('KAGGLE_API_ENDPOINT') == 'http://localhost':
args = ['--local']
args.append('--local')
verbose = (os.environ.get('VERBOSE') or os.environ.get('VERBOSE_OUTPUT') or "false").lower()
if verbose in ('1', 'true', 'yes'):
args.append('--verbose')
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# Kernels valid types
valid_push_kernel_types = ['script', 'notebook']
Expand Down Expand Up @@ -513,39 +527,76 @@ def retriable_func(*args):
## Authentication

def authenticate(self) -> None:
"""Authenticate the user with the Kaggle API.
if self._authenticate_with_access_token():
return
if self._authenticate_with_legacy_apikey():
return
print(
'Could not find {}. Make sure it\'s located in'
' {}. Or use the environment method. See setup'
' instructions at'
' https://github.com/Kaggle/kaggle-api/'.format(self.config_file, self.config_dir)
)
exit(1)

def _authenticate_with_legacy_apikey(self) -> bool:
"""Authenticate the user with the Kaggle API using legacy API key.

This method will generate a configuration, first checking the
environment for credential variables, and falling back to looking
for the .kaggle/kaggle.json configuration file.
"""

config_data: Dict[str, str] = {}
config_values: Dict[str, str] = {}
# Ex: 'datasets list', 'competitions files', 'models instances get', etc.
api_command = ' '.join(sys.argv[1:])

# Step 1: try getting username/password from environment
config_data = self.read_config_environment(config_data)
config_values = self.read_config_environment(config_values)

# Step 2: if credentials were not in env read in configuration file
if self.CONFIG_NAME_USER not in config_data or self.CONFIG_NAME_KEY not in config_data:
if self.CONFIG_NAME_USER not in config_values or self.CONFIG_NAME_KEY not in config_values:
if os.path.exists(self.config):
config_data = self.read_config_file(config_data)
elif self._is_help_or_version_command(api_command) or (
len(sys.argv) > 2 and api_command.startswith(self.command_prefixes_allowing_anonymous_access)
):
config_values = self.read_config_file(config_values)
elif self._command_allows_logged_out(api_command):
# Some API commands should be allowed without authentication.
return
return True
else:
raise IOError(
'Could not find {}. Make sure it\'s located in'
' {}. Or use the environment method. See setup'
' instructions at'
' https://github.com/Kaggle/kaggle-api/'.format(self.config_file, self.config_dir)
)
return False

# Step 3: Validate and save
# Username and password are required.
for item in [self.CONFIG_NAME_USER, self.CONFIG_NAME_KEY]:
if item not in config_values:
raise ValueError('Error: Missing %s in configuration.' % item)
self.config_values = config_values
self.config_values[self.CONFIG_NAME_AUTH_METHOD] = AuthMethod.LEGACY_API_KEY
self.logger.debug(f'Authenticated with legacy api key in: {self.config}')
return True

def _authenticate_with_access_token(self) -> bool:
(access_token, source) = get_access_token_from_env()
if not access_token:
return False

username = self._introspect_token(access_token)
if not username:
self.logger.debug(f'Ignoring invalid/expired access token in \"{source}\".')
return False

# Step 3: load into configuration!
self._load_config(config_data)
self.config_values: Dict[str, str] = {
self.CONFIG_NAME_TOKEN: access_token,
self.CONFIG_NAME_USER: username,
self.CONFIG_NAME_AUTH_METHOD: AuthMethod.ACCESS_TOKEN,
}
self.logger.debug(f'Authenticated with access token in: {source}')
return True

def _command_allows_logged_out(self, api_command: str) -> bool:
# Some API commands do not required authentication.
return self._is_help_or_version_command(api_command) or (
len(sys.argv) > 2 and api_command.startswith(self.command_prefixes_allowing_anonymous_access)
)

def _is_help_or_version_command(self, api_command: str) -> bool:
"""Determines if the string command passed in is for a help or version
Expand Down Expand Up @@ -582,23 +633,6 @@ def read_config_environment(self, config_data: Optional[Dict[str, str]] = None)

## Configuration

def _load_config(self, config_data: Dict[str, str]) -> None:
"""The final step of the authenticate steps, where we load the values from
config_data into the Configuration object.

Parameters
==========
config_data: a dictionary with configuration values (keys) to read
into self.config_values
"""
# Username and password are required.

for item in [self.CONFIG_NAME_USER, self.CONFIG_NAME_KEY]:
if item not in config_data:
raise ValueError('Error: Missing %s in configuration.' % item)

self.config_values = config_data

def read_config_file(self, config_data: Optional[Dict[str, str]] = None, quiet: bool = False) -> Dict[str, str]:
"""read_config_file is the first effort to get a username and key to
authenticate to the Kaggle API. Since we can get the username and password
Expand Down Expand Up @@ -715,9 +749,7 @@ def get_config_value(self, name: str) -> Optional[str]:
==========
name: the config value key to get
"""
if name in self.config_values:
return self.config_values[name]
return None
return self.config_values.get(name)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lol, can't believe I didn't notice this!


def get_default_download_dir(self, *subdirs: str) -> str:
"""Get the download path for a file. If not defined, return default from
Expand Down Expand Up @@ -749,7 +781,7 @@ def print_config_value(self, name, prefix='- ', separator=': '):
value_out = 'None'
if name in self.config_values and self.config_values[name] is not None:
value_out = self.config_values[name]
print(prefix + name + separator + value_out)
print(f"{prefix}{name}{separator}{value_out}")

def print_config_values(self, prefix='- '):
"""Print all configuration values.
Expand All @@ -762,6 +794,7 @@ def print_config_values(self, prefix='- '):
return
print('Configuration values from ' + self.config_dir)
self.print_config_value(self.CONFIG_NAME_USER, prefix=prefix)
self.print_config_value(self.CONFIG_NAME_AUTH_METHOD, prefix=prefix)
self.print_config_value(self.CONFIG_NAME_PATH, prefix=prefix)
self.print_config_value(self.CONFIG_NAME_PROXY, prefix=prefix)
self.print_config_value(self.CONFIG_NAME_COMPETITION, prefix=prefix)
Expand All @@ -777,9 +810,12 @@ def build_kaggle_client(self) -> kagglesdk.kaggle_client.KaggleClient:
)
)
verbose = '--verbose' in self.args or '-v' in self.args
# config = self.api_client.configuration
return KaggleClient(
env=env, verbose=verbose, username=self.config_values['username'], password=self.config_values['key']
env=env,
verbose=verbose,
username=self.config_values.get(self.CONFIG_NAME_USER),
password=self.config_values.get(self.CONFIG_NAME_KEY),
api_token=self.config_values.get(self.CONFIG_NAME_TOKEN),
)

def camel_to_snake(self, name: str) -> str:
Expand Down
2 changes: 1 addition & 1 deletion src/kagglesdk/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from kagglesdk.kaggle_client import KaggleClient
from kagglesdk.kaggle_env import KaggleEnv
from kagglesdk.kaggle_env import get_access_token_from_env, KaggleEnv
61 changes: 31 additions & 30 deletions src/kagglesdk/kaggle_client.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,74 @@
from kagglesdk.kernels.services.kernels_api_service import KernelsApiClient
from kagglesdk.blobs.services.blob_api_service import BlobApiClient
from kagglesdk.education.services.education_api_service import EducationApiClient
from kagglesdk.security.services.oauth_service import OAuthClient
from kagglesdk.models.services.model_api_service import ModelApiClient
from kagglesdk.models.services.model_service import ModelClient
from kagglesdk.kernels.services.kernels_api_service import KernelsApiClient
from kagglesdk.competitions.services.competition_api_service import CompetitionApiClient
from kagglesdk.education.services.education_api_service import EducationApiClient
from kagglesdk.datasets.services.dataset_api_service import DatasetApiClient
from kagglesdk.admin.services.inbox_file_service import InboxFileClient
from kagglesdk.security.services.oauth_service import OAuthClient
from kagglesdk.users.services.account_service import AccountClient
from kagglesdk.admin.services.inbox_file_service import InboxFileClient
from kagglesdk.blobs.services.blob_api_service import BlobApiClient
from kagglesdk.kaggle_env import KaggleEnv
from kagglesdk.kaggle_http_client import KaggleHttpClient


class KaggleClient(object):
class Kernels(object):
def __init__(self, http_client: KaggleHttpClient):
self.kernels_api_client = KernelsApiClient(http_client)

class Blobs(object):
def __init__(self, http_client: KaggleHttpClient):
self.blob_api_client = BlobApiClient(http_client)

class Education(object):
class Security(object):
def __init__(self, http_client: KaggleHttpClient):
self.education_api_client = EducationApiClient(http_client)
self.oauth_client = OAuthClient(http_client)

class Models(object):
def __init__(self, http_client: KaggleHttpClient):
self.model_api_client = ModelApiClient(http_client)
self.model_client = ModelClient(http_client)

class Kernels(object):
def __init__(self, http_client: KaggleHttpClient):
self.kernels_api_client = KernelsApiClient(http_client)

class Competitions(object):
def __init__(self, http_client: KaggleHttpClient):
self.competition_api_client = CompetitionApiClient(http_client)

class Education(object):
def __init__(self, http_client: KaggleHttpClient):
self.education_api_client = EducationApiClient(http_client)

class Datasets(object):
def __init__(self, http_client: KaggleHttpClient):
self.dataset_api_client = DatasetApiClient(http_client)

class Admin(object):
class Users(object):
def __init__(self, http_client: KaggleHttpClient):
self.inbox_file_client = InboxFileClient(http_client)
self.account_client = AccountClient(http_client)

class Security(object):
class Admin(object):
def __init__(self, http_client: KaggleHttpClient):
self.oauth_client = OAuthClient(http_client)
self.inbox_file_client = InboxFileClient(http_client)

class Users(object):
class Blobs(object):
def __init__(self, http_client: KaggleHttpClient):
self.account_client = AccountClient(http_client)
self.blob_api_client = BlobApiClient(http_client)

def __init__(self, env: KaggleEnv = None, verbose: bool = False, username: str = None, password: str = None):
self._http_client = http_client = KaggleHttpClient(env, verbose, self._renew_iap_token, username=username, password=password)
self.kernels = KaggleClient.Kernels(http_client)
self.blobs = KaggleClient.Blobs(http_client)
self.education = KaggleClient.Education(http_client)
def __init__(self, env: KaggleEnv = None, verbose: bool = False, username: str = None, password: str = None, api_token: str = None):
self._http_client = http_client = KaggleHttpClient(env, verbose, username=username, password=password, api_token=api_token)
self.security = KaggleClient.Security(http_client)
self.models = KaggleClient.Models(http_client)
self.kernels = KaggleClient.Kernels(http_client)
self.competitions = KaggleClient.Competitions(http_client)
self.education = KaggleClient.Education(http_client)
self.datasets = KaggleClient.Datasets(http_client)
self.admin = KaggleClient.Admin(http_client)
self.security = KaggleClient.Security(http_client)
self.users = KaggleClient.Users(http_client)
self.admin = KaggleClient.Admin(http_client)
self.blobs = KaggleClient.Blobs(http_client)
self.username = username
self.password = password
self.api_token = api_token

def http_client(self):
def http_client(self) -> str:
return self._http_client

def _renew_iap_token(self):
def _renew_iap_token(self) -> str:
return self.admin.admin_client.renew_iap_token()

def __enter__(self):
Expand Down
34 changes: 32 additions & 2 deletions src/kagglesdk/kaggle_env.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import logging
import os
from enum import Enum
from pathlib import Path

KAGGLE_NOTEBOOK_ENV_VAR_NAME = "KAGGLE_KERNEL_RUN_TYPE"
KAGGLE_DATA_PROXY_URL_ENV_VAR_NAME = "KAGGLE_DATA_PROXY_URL"
KAGGLE_API_V1_TOKEN_PATH = "KAGGLE_API_V1_TOKEN"

logger = logging.getLogger(__name__)
def get_logger():
return logging.getLogger(__name__)

class KaggleEnv(Enum):
LOCAL = 0 # localhost
Expand Down Expand Up @@ -50,10 +52,38 @@ def is_in_kaggle_notebook() -> bool:
if os.getenv(KAGGLE_NOTEBOOK_ENV_VAR_NAME) is not None:
if os.getenv(KAGGLE_DATA_PROXY_URL_ENV_VAR_NAME) is None:
# Missing endpoint for the Jwt client
logger.warning(
get_logger().warning(
"Can't use the Kaggle Cache. "
f"The '{KAGGLE_DATA_PROXY_URL_ENV_VAR_NAME}' environment variable is not set."
)
return False
return True
return False

def _get_access_token_from_file(path):
if not path:
return (None, None)

token_path = Path(path)
if not token_path.exists():
return (None, None)

token_value = token_path.read_text().strip()
if not token_value:
return (None, None)

get_logger().debug(f"Using access token from file: \"{path}\"")
return (token_value, path)

def get_access_token_from_env():
if is_in_kaggle_notebook():
token = _get_access_token_from_file(os.environ.get(KAGGLE_API_V1_TOKEN_PATH))
if token:
return (token, KAGGLE_API_V1_TOKEN_PATH)

access_token = os.environ.get('KAGGLE_API_TOKEN')
if access_token is not None:
get_logger().debug("Using access token from KAGGLE_API_TOKEN environment variable")
return (access_token, 'KAGGLE_API_TOKEN')

return (None, None)
Loading