Skip to content

Commit 092602e

Browse files
authored
Allow token bearer authentication through environment variables (#799)
1 parent 6f128a7 commit 092602e

File tree

8 files changed

+158
-96
lines changed

8 files changed

+158
-96
lines changed

src/kaggle/api/kaggle_api_extended.py

Lines changed: 81 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,11 @@
1919

2020
import csv
2121
from datetime import datetime
22+
from enum import Enum
2223
import io
2324

2425
import json # Needed by mypy.
26+
import logging
2527
import os
2628

2729
import re # Needed by mypy.
@@ -48,7 +50,7 @@
4850
from google.protobuf import field_mask_pb2
4951

5052
import kaggle
51-
from kagglesdk import KaggleClient, KaggleEnv # type: ignore[attr-defined]
53+
from kagglesdk import get_access_token_from_env, KaggleClient, KaggleEnv # type: ignore[attr-defined]
5254
from kagglesdk.admin.types.inbox_file_service import CreateInboxFileRequest
5355
from kagglesdk.blobs.types.blob_api_service import ApiStartBlobUploadRequest, ApiStartBlobUploadResponse, ApiBlobType
5456
from kagglesdk.competitions.types.competition_api_service import (
@@ -150,6 +152,14 @@
150152
T = TypeVar('T')
151153

152154

155+
class AuthMethod(Enum):
156+
LEGACY_API_KEY = 0
157+
ACCESS_TOKEN = 1
158+
159+
def __str__(self):
160+
return self.name
161+
162+
153163
class DirectoryArchive(object):
154164

155165
def __init__(self, fullpath, fmt):
@@ -365,7 +375,9 @@ class KaggleApi:
365375
CONFIG_NAME_COMPETITION = 'competition'
366376
CONFIG_NAME_PATH = 'path'
367377
CONFIG_NAME_USER = 'username'
378+
CONFIG_NAME_AUTH_METHOD = 'auth_method'
368379
CONFIG_NAME_KEY = 'key'
380+
CONFIG_NAME_TOKEN = 'token'
369381
CONFIG_NAME_SSL_CA_CERT = 'ssl_ca_cert'
370382

371383
HEADER_API_VERSION = 'X-Kaggle-ApiVersion'
@@ -398,10 +410,12 @@ class KaggleApi:
398410

399411
args: List[str] = []
400412
if os.environ.get('KAGGLE_API_ENVIRONMENT') == 'LOCALHOST':
401-
# Make it verbose when running in the debugger.
402-
args = ['--verbose', '--local']
403-
elif os.environ.get('KAGGLE_API_ENDPOINT') == 'http://localhost':
404-
args = ['--local']
413+
args.append('--local')
414+
verbose = (os.environ.get('VERBOSE') or os.environ.get('VERBOSE_OUTPUT') or "false").lower()
415+
if verbose in ('1', 'true', 'yes'):
416+
args.append('--verbose')
417+
logging.basicConfig(level=logging.DEBUG)
418+
logger = logging.getLogger(__name__)
405419

406420
# Kernels valid types
407421
valid_push_kernel_types = ['script', 'notebook']
@@ -513,39 +527,76 @@ def retriable_func(*args):
513527
## Authentication
514528

515529
def authenticate(self) -> None:
516-
"""Authenticate the user with the Kaggle API.
530+
if self._authenticate_with_access_token():
531+
return
532+
if self._authenticate_with_legacy_apikey():
533+
return
534+
print(
535+
'Could not find {}. Make sure it\'s located in'
536+
' {}. Or use the environment method. See setup'
537+
' instructions at'
538+
' https://github.com/Kaggle/kaggle-api/'.format(self.config_file, self.config_dir)
539+
)
540+
exit(1)
541+
542+
def _authenticate_with_legacy_apikey(self) -> bool:
543+
"""Authenticate the user with the Kaggle API using legacy API key.
517544
518545
This method will generate a configuration, first checking the
519546
environment for credential variables, and falling back to looking
520547
for the .kaggle/kaggle.json configuration file.
521548
"""
522549

523-
config_data: Dict[str, str] = {}
550+
config_values: Dict[str, str] = {}
524551
# Ex: 'datasets list', 'competitions files', 'models instances get', etc.
525552
api_command = ' '.join(sys.argv[1:])
526553

527554
# Step 1: try getting username/password from environment
528-
config_data = self.read_config_environment(config_data)
555+
config_values = self.read_config_environment(config_values)
529556

530557
# Step 2: if credentials were not in env read in configuration file
531-
if self.CONFIG_NAME_USER not in config_data or self.CONFIG_NAME_KEY not in config_data:
558+
if self.CONFIG_NAME_USER not in config_values or self.CONFIG_NAME_KEY not in config_values:
532559
if os.path.exists(self.config):
533-
config_data = self.read_config_file(config_data)
534-
elif self._is_help_or_version_command(api_command) or (
535-
len(sys.argv) > 2 and api_command.startswith(self.command_prefixes_allowing_anonymous_access)
536-
):
560+
config_values = self.read_config_file(config_values)
561+
elif self._command_allows_logged_out(api_command):
537562
# Some API commands should be allowed without authentication.
538-
return
563+
return True
539564
else:
540-
raise IOError(
541-
'Could not find {}. Make sure it\'s located in'
542-
' {}. Or use the environment method. See setup'
543-
' instructions at'
544-
' https://github.com/Kaggle/kaggle-api/'.format(self.config_file, self.config_dir)
545-
)
565+
return False
566+
567+
# Step 3: Validate and save
568+
# Username and password are required.
569+
for item in [self.CONFIG_NAME_USER, self.CONFIG_NAME_KEY]:
570+
if item not in config_values:
571+
raise ValueError('Error: Missing %s in configuration.' % item)
572+
self.config_values = config_values
573+
self.config_values[self.CONFIG_NAME_AUTH_METHOD] = AuthMethod.LEGACY_API_KEY
574+
self.logger.debug(f'Authenticated with legacy api key in: {self.config}')
575+
return True
576+
577+
def _authenticate_with_access_token(self) -> bool:
578+
(access_token, source) = get_access_token_from_env()
579+
if not access_token:
580+
return False
581+
582+
username = self._introspect_token(access_token)
583+
if not username:
584+
self.logger.debug(f'Ignoring invalid/expired access token in \"{source}\".')
585+
return False
546586

547-
# Step 3: load into configuration!
548-
self._load_config(config_data)
587+
self.config_values: Dict[str, str] = {
588+
self.CONFIG_NAME_TOKEN: access_token,
589+
self.CONFIG_NAME_USER: username,
590+
self.CONFIG_NAME_AUTH_METHOD: AuthMethod.ACCESS_TOKEN,
591+
}
592+
self.logger.debug(f'Authenticated with access token in: {source}')
593+
return True
594+
595+
def _command_allows_logged_out(self, api_command: str) -> bool:
596+
# Some API commands do not required authentication.
597+
return self._is_help_or_version_command(api_command) or (
598+
len(sys.argv) > 2 and api_command.startswith(self.command_prefixes_allowing_anonymous_access)
599+
)
549600

550601
def _is_help_or_version_command(self, api_command: str) -> bool:
551602
"""Determines if the string command passed in is for a help or version
@@ -582,23 +633,6 @@ def read_config_environment(self, config_data: Optional[Dict[str, str]] = None)
582633

583634
## Configuration
584635

585-
def _load_config(self, config_data: Dict[str, str]) -> None:
586-
"""The final step of the authenticate steps, where we load the values from
587-
config_data into the Configuration object.
588-
589-
Parameters
590-
==========
591-
config_data: a dictionary with configuration values (keys) to read
592-
into self.config_values
593-
"""
594-
# Username and password are required.
595-
596-
for item in [self.CONFIG_NAME_USER, self.CONFIG_NAME_KEY]:
597-
if item not in config_data:
598-
raise ValueError('Error: Missing %s in configuration.' % item)
599-
600-
self.config_values = config_data
601-
602636
def read_config_file(self, config_data: Optional[Dict[str, str]] = None, quiet: bool = False) -> Dict[str, str]:
603637
"""read_config_file is the first effort to get a username and key to
604638
authenticate to the Kaggle API. Since we can get the username and password
@@ -715,9 +749,7 @@ def get_config_value(self, name: str) -> Optional[str]:
715749
==========
716750
name: the config value key to get
717751
"""
718-
if name in self.config_values:
719-
return self.config_values[name]
720-
return None
752+
return self.config_values.get(name)
721753

722754
def get_default_download_dir(self, *subdirs: str) -> str:
723755
"""Get the download path for a file. If not defined, return default from
@@ -749,7 +781,7 @@ def print_config_value(self, name, prefix='- ', separator=': '):
749781
value_out = 'None'
750782
if name in self.config_values and self.config_values[name] is not None:
751783
value_out = self.config_values[name]
752-
print(prefix + name + separator + value_out)
784+
print(f"{prefix}{name}{separator}{value_out}")
753785

754786
def print_config_values(self, prefix='- '):
755787
"""Print all configuration values.
@@ -762,6 +794,7 @@ def print_config_values(self, prefix='- '):
762794
return
763795
print('Configuration values from ' + self.config_dir)
764796
self.print_config_value(self.CONFIG_NAME_USER, prefix=prefix)
797+
self.print_config_value(self.CONFIG_NAME_AUTH_METHOD, prefix=prefix)
765798
self.print_config_value(self.CONFIG_NAME_PATH, prefix=prefix)
766799
self.print_config_value(self.CONFIG_NAME_PROXY, prefix=prefix)
767800
self.print_config_value(self.CONFIG_NAME_COMPETITION, prefix=prefix)
@@ -777,9 +810,12 @@ def build_kaggle_client(self) -> kagglesdk.kaggle_client.KaggleClient:
777810
)
778811
)
779812
verbose = '--verbose' in self.args or '-v' in self.args
780-
# config = self.api_client.configuration
781813
return KaggleClient(
782-
env=env, verbose=verbose, username=self.config_values['username'], password=self.config_values['key']
814+
env=env,
815+
verbose=verbose,
816+
username=self.config_values.get(self.CONFIG_NAME_USER),
817+
password=self.config_values.get(self.CONFIG_NAME_KEY),
818+
api_token=self.config_values.get(self.CONFIG_NAME_TOKEN),
783819
)
784820

785821
def camel_to_snake(self, name: str) -> str:

src/kagglesdk/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
from kagglesdk.kaggle_client import KaggleClient
2-
from kagglesdk.kaggle_env import KaggleEnv
2+
from kagglesdk.kaggle_env import get_access_token_from_env, KaggleEnv

src/kagglesdk/kaggle_client.py

Lines changed: 31 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,74 @@
1-
from kagglesdk.kernels.services.kernels_api_service import KernelsApiClient
2-
from kagglesdk.blobs.services.blob_api_service import BlobApiClient
3-
from kagglesdk.education.services.education_api_service import EducationApiClient
1+
from kagglesdk.security.services.oauth_service import OAuthClient
42
from kagglesdk.models.services.model_api_service import ModelApiClient
53
from kagglesdk.models.services.model_service import ModelClient
4+
from kagglesdk.kernels.services.kernels_api_service import KernelsApiClient
65
from kagglesdk.competitions.services.competition_api_service import CompetitionApiClient
6+
from kagglesdk.education.services.education_api_service import EducationApiClient
77
from kagglesdk.datasets.services.dataset_api_service import DatasetApiClient
8-
from kagglesdk.admin.services.inbox_file_service import InboxFileClient
9-
from kagglesdk.security.services.oauth_service import OAuthClient
108
from kagglesdk.users.services.account_service import AccountClient
9+
from kagglesdk.admin.services.inbox_file_service import InboxFileClient
10+
from kagglesdk.blobs.services.blob_api_service import BlobApiClient
1111
from kagglesdk.kaggle_env import KaggleEnv
1212
from kagglesdk.kaggle_http_client import KaggleHttpClient
1313

1414

1515
class KaggleClient(object):
16-
class Kernels(object):
17-
def __init__(self, http_client: KaggleHttpClient):
18-
self.kernels_api_client = KernelsApiClient(http_client)
19-
20-
class Blobs(object):
21-
def __init__(self, http_client: KaggleHttpClient):
22-
self.blob_api_client = BlobApiClient(http_client)
23-
24-
class Education(object):
16+
class Security(object):
2517
def __init__(self, http_client: KaggleHttpClient):
26-
self.education_api_client = EducationApiClient(http_client)
18+
self.oauth_client = OAuthClient(http_client)
2719

2820
class Models(object):
2921
def __init__(self, http_client: KaggleHttpClient):
3022
self.model_api_client = ModelApiClient(http_client)
3123
self.model_client = ModelClient(http_client)
3224

25+
class Kernels(object):
26+
def __init__(self, http_client: KaggleHttpClient):
27+
self.kernels_api_client = KernelsApiClient(http_client)
28+
3329
class Competitions(object):
3430
def __init__(self, http_client: KaggleHttpClient):
3531
self.competition_api_client = CompetitionApiClient(http_client)
3632

33+
class Education(object):
34+
def __init__(self, http_client: KaggleHttpClient):
35+
self.education_api_client = EducationApiClient(http_client)
36+
3737
class Datasets(object):
3838
def __init__(self, http_client: KaggleHttpClient):
3939
self.dataset_api_client = DatasetApiClient(http_client)
4040

41-
class Admin(object):
41+
class Users(object):
4242
def __init__(self, http_client: KaggleHttpClient):
43-
self.inbox_file_client = InboxFileClient(http_client)
43+
self.account_client = AccountClient(http_client)
4444

45-
class Security(object):
45+
class Admin(object):
4646
def __init__(self, http_client: KaggleHttpClient):
47-
self.oauth_client = OAuthClient(http_client)
47+
self.inbox_file_client = InboxFileClient(http_client)
4848

49-
class Users(object):
49+
class Blobs(object):
5050
def __init__(self, http_client: KaggleHttpClient):
51-
self.account_client = AccountClient(http_client)
51+
self.blob_api_client = BlobApiClient(http_client)
5252

53-
def __init__(self, env: KaggleEnv = None, verbose: bool = False, username: str = None, password: str = None):
54-
self._http_client = http_client = KaggleHttpClient(env, verbose, self._renew_iap_token, username=username, password=password)
55-
self.kernels = KaggleClient.Kernels(http_client)
56-
self.blobs = KaggleClient.Blobs(http_client)
57-
self.education = KaggleClient.Education(http_client)
53+
def __init__(self, env: KaggleEnv = None, verbose: bool = False, username: str = None, password: str = None, api_token: str = None):
54+
self._http_client = http_client = KaggleHttpClient(env, verbose, username=username, password=password, api_token=api_token)
55+
self.security = KaggleClient.Security(http_client)
5856
self.models = KaggleClient.Models(http_client)
57+
self.kernels = KaggleClient.Kernels(http_client)
5958
self.competitions = KaggleClient.Competitions(http_client)
59+
self.education = KaggleClient.Education(http_client)
6060
self.datasets = KaggleClient.Datasets(http_client)
61-
self.admin = KaggleClient.Admin(http_client)
62-
self.security = KaggleClient.Security(http_client)
6361
self.users = KaggleClient.Users(http_client)
62+
self.admin = KaggleClient.Admin(http_client)
63+
self.blobs = KaggleClient.Blobs(http_client)
6464
self.username = username
6565
self.password = password
66+
self.api_token = api_token
6667

67-
def http_client(self):
68+
def http_client(self) -> str:
6869
return self._http_client
6970

70-
def _renew_iap_token(self):
71+
def _renew_iap_token(self) -> str:
7172
return self.admin.admin_client.renew_iap_token()
7273

7374
def __enter__(self):

src/kagglesdk/kaggle_env.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
import logging
22
import os
33
from enum import Enum
4+
from pathlib import Path
45

56
KAGGLE_NOTEBOOK_ENV_VAR_NAME = "KAGGLE_KERNEL_RUN_TYPE"
67
KAGGLE_DATA_PROXY_URL_ENV_VAR_NAME = "KAGGLE_DATA_PROXY_URL"
78
KAGGLE_API_V1_TOKEN_PATH = "KAGGLE_API_V1_TOKEN"
89

9-
logger = logging.getLogger(__name__)
10+
def get_logger():
11+
return logging.getLogger(__name__)
1012

1113
class KaggleEnv(Enum):
1214
LOCAL = 0 # localhost
@@ -50,10 +52,38 @@ def is_in_kaggle_notebook() -> bool:
5052
if os.getenv(KAGGLE_NOTEBOOK_ENV_VAR_NAME) is not None:
5153
if os.getenv(KAGGLE_DATA_PROXY_URL_ENV_VAR_NAME) is None:
5254
# Missing endpoint for the Jwt client
53-
logger.warning(
55+
get_logger().warning(
5456
"Can't use the Kaggle Cache. "
5557
f"The '{KAGGLE_DATA_PROXY_URL_ENV_VAR_NAME}' environment variable is not set."
5658
)
5759
return False
5860
return True
5961
return False
62+
63+
def _get_access_token_from_file(path):
64+
if not path:
65+
return (None, None)
66+
67+
token_path = Path(path)
68+
if not token_path.exists():
69+
return (None, None)
70+
71+
token_value = token_path.read_text().strip()
72+
if not token_value:
73+
return (None, None)
74+
75+
get_logger().debug(f"Using access token from file: \"{path}\"")
76+
return (token_value, path)
77+
78+
def get_access_token_from_env():
79+
if is_in_kaggle_notebook():
80+
token = _get_access_token_from_file(os.environ.get(KAGGLE_API_V1_TOKEN_PATH))
81+
if token:
82+
return (token, KAGGLE_API_V1_TOKEN_PATH)
83+
84+
access_token = os.environ.get('KAGGLE_API_TOKEN')
85+
if access_token is not None:
86+
get_logger().debug("Using access token from KAGGLE_API_TOKEN environment variable")
87+
return (access_token, 'KAGGLE_API_TOKEN')
88+
89+
return (None, None)

0 commit comments

Comments
 (0)