diff --git a/src/kaggle/api/kaggle_api_extended.py b/src/kaggle/api/kaggle_api_extended.py index 5c5d449..edd95ea 100644 --- a/src/kaggle/api/kaggle_api_extended.py +++ b/src/kaggle/api/kaggle_api_extended.py @@ -19,9 +19,11 @@ import csv from datetime import datetime +from enum import Enum import io import json # Needed by mypy. +import logging import os import re # Needed by mypy. @@ -48,7 +50,7 @@ from google.protobuf import field_mask_pb2 import kaggle -from kagglesdk import KaggleClient, KaggleEnv # type: ignore[attr-defined] +from kagglesdk import get_access_token_from_env, KaggleClient, KaggleEnv # type: ignore[attr-defined] from kagglesdk.admin.types.inbox_file_service import CreateInboxFileRequest from kagglesdk.blobs.types.blob_api_service import ApiStartBlobUploadRequest, ApiStartBlobUploadResponse, ApiBlobType from kagglesdk.competitions.types.competition_api_service import ( @@ -150,6 +152,14 @@ T = TypeVar('T') +class AuthMethod(Enum): + LEGACY_API_KEY = 0 + ACCESS_TOKEN = 1 + + def __str__(self): + return self.name + + class DirectoryArchive(object): def __init__(self, fullpath, fmt): @@ -365,7 +375,9 @@ class KaggleApi: CONFIG_NAME_COMPETITION = 'competition' CONFIG_NAME_PATH = 'path' CONFIG_NAME_USER = 'username' + CONFIG_NAME_AUTH_METHOD = 'auth_method' CONFIG_NAME_KEY = 'key' + CONFIG_NAME_TOKEN = 'token' CONFIG_NAME_SSL_CA_CERT = 'ssl_ca_cert' HEADER_API_VERSION = 'X-Kaggle-ApiVersion' @@ -398,10 +410,12 @@ class KaggleApi: args: List[str] = [] if os.environ.get('KAGGLE_API_ENVIRONMENT') == 'LOCALHOST': - # Make it verbose when running in the debugger. - args = ['--verbose', '--local'] - elif os.environ.get('KAGGLE_API_ENDPOINT') == 'http://localhost': - args = ['--local'] + args.append('--local') + verbose = (os.environ.get('VERBOSE') or os.environ.get('VERBOSE_OUTPUT') or "false").lower() + if verbose in ('1', 'true', 'yes'): + args.append('--verbose') + logging.basicConfig(level=logging.DEBUG) + logger = logging.getLogger(__name__) # Kernels valid types valid_push_kernel_types = ['script', 'notebook'] @@ -513,39 +527,76 @@ def retriable_func(*args): ## Authentication def authenticate(self) -> None: - """Authenticate the user with the Kaggle API. + if self._authenticate_with_access_token(): + return + if self._authenticate_with_legacy_apikey(): + return + print( + 'Could not find {}. Make sure it\'s located in' + ' {}. Or use the environment method. See setup' + ' instructions at' + ' https://github.com/Kaggle/kaggle-api/'.format(self.config_file, self.config_dir) + ) + exit(1) + + def _authenticate_with_legacy_apikey(self) -> bool: + """Authenticate the user with the Kaggle API using legacy API key. This method will generate a configuration, first checking the environment for credential variables, and falling back to looking for the .kaggle/kaggle.json configuration file. """ - config_data: Dict[str, str] = {} + config_values: Dict[str, str] = {} # Ex: 'datasets list', 'competitions files', 'models instances get', etc. api_command = ' '.join(sys.argv[1:]) # Step 1: try getting username/password from environment - config_data = self.read_config_environment(config_data) + config_values = self.read_config_environment(config_values) # Step 2: if credentials were not in env read in configuration file - if self.CONFIG_NAME_USER not in config_data or self.CONFIG_NAME_KEY not in config_data: + if self.CONFIG_NAME_USER not in config_values or self.CONFIG_NAME_KEY not in config_values: if os.path.exists(self.config): - config_data = self.read_config_file(config_data) - elif self._is_help_or_version_command(api_command) or ( - len(sys.argv) > 2 and api_command.startswith(self.command_prefixes_allowing_anonymous_access) - ): + config_values = self.read_config_file(config_values) + elif self._command_allows_logged_out(api_command): # Some API commands should be allowed without authentication. - return + return True else: - raise IOError( - 'Could not find {}. Make sure it\'s located in' - ' {}. Or use the environment method. See setup' - ' instructions at' - ' https://github.com/Kaggle/kaggle-api/'.format(self.config_file, self.config_dir) - ) + return False + + # Step 3: Validate and save + # Username and password are required. + for item in [self.CONFIG_NAME_USER, self.CONFIG_NAME_KEY]: + if item not in config_values: + raise ValueError('Error: Missing %s in configuration.' % item) + self.config_values = config_values + self.config_values[self.CONFIG_NAME_AUTH_METHOD] = AuthMethod.LEGACY_API_KEY + self.logger.debug(f'Authenticated with legacy api key in: {self.config}') + return True + + def _authenticate_with_access_token(self) -> bool: + (access_token, source) = get_access_token_from_env() + if not access_token: + return False + + username = self._introspect_token(access_token) + if not username: + self.logger.debug(f'Ignoring invalid/expired access token in \"{source}\".') + return False - # Step 3: load into configuration! - self._load_config(config_data) + self.config_values: Dict[str, str] = { + self.CONFIG_NAME_TOKEN: access_token, + self.CONFIG_NAME_USER: username, + self.CONFIG_NAME_AUTH_METHOD: AuthMethod.ACCESS_TOKEN, + } + self.logger.debug(f'Authenticated with access token in: {source}') + return True + + def _command_allows_logged_out(self, api_command: str) -> bool: + # Some API commands do not required authentication. + return self._is_help_or_version_command(api_command) or ( + len(sys.argv) > 2 and api_command.startswith(self.command_prefixes_allowing_anonymous_access) + ) def _is_help_or_version_command(self, api_command: str) -> bool: """Determines if the string command passed in is for a help or version @@ -582,23 +633,6 @@ def read_config_environment(self, config_data: Optional[Dict[str, str]] = None) ## Configuration - def _load_config(self, config_data: Dict[str, str]) -> None: - """The final step of the authenticate steps, where we load the values from - config_data into the Configuration object. - - Parameters - ========== - config_data: a dictionary with configuration values (keys) to read - into self.config_values - """ - # Username and password are required. - - for item in [self.CONFIG_NAME_USER, self.CONFIG_NAME_KEY]: - if item not in config_data: - raise ValueError('Error: Missing %s in configuration.' % item) - - self.config_values = config_data - def read_config_file(self, config_data: Optional[Dict[str, str]] = None, quiet: bool = False) -> Dict[str, str]: """read_config_file is the first effort to get a username and key to authenticate to the Kaggle API. Since we can get the username and password @@ -715,9 +749,7 @@ def get_config_value(self, name: str) -> Optional[str]: ========== name: the config value key to get """ - if name in self.config_values: - return self.config_values[name] - return None + return self.config_values.get(name) def get_default_download_dir(self, *subdirs: str) -> str: """Get the download path for a file. If not defined, return default from @@ -749,7 +781,7 @@ def print_config_value(self, name, prefix='- ', separator=': '): value_out = 'None' if name in self.config_values and self.config_values[name] is not None: value_out = self.config_values[name] - print(prefix + name + separator + value_out) + print(f"{prefix}{name}{separator}{value_out}") def print_config_values(self, prefix='- '): """Print all configuration values. @@ -762,6 +794,7 @@ def print_config_values(self, prefix='- '): return print('Configuration values from ' + self.config_dir) self.print_config_value(self.CONFIG_NAME_USER, prefix=prefix) + self.print_config_value(self.CONFIG_NAME_AUTH_METHOD, prefix=prefix) self.print_config_value(self.CONFIG_NAME_PATH, prefix=prefix) self.print_config_value(self.CONFIG_NAME_PROXY, prefix=prefix) self.print_config_value(self.CONFIG_NAME_COMPETITION, prefix=prefix) @@ -777,9 +810,12 @@ def build_kaggle_client(self) -> kagglesdk.kaggle_client.KaggleClient: ) ) verbose = '--verbose' in self.args or '-v' in self.args - # config = self.api_client.configuration return KaggleClient( - env=env, verbose=verbose, username=self.config_values['username'], password=self.config_values['key'] + env=env, + verbose=verbose, + username=self.config_values.get(self.CONFIG_NAME_USER), + password=self.config_values.get(self.CONFIG_NAME_KEY), + api_token=self.config_values.get(self.CONFIG_NAME_TOKEN), ) def camel_to_snake(self, name: str) -> str: diff --git a/src/kagglesdk/__init__.py b/src/kagglesdk/__init__.py index a14cc66..1d6768f 100644 --- a/src/kagglesdk/__init__.py +++ b/src/kagglesdk/__init__.py @@ -1,2 +1,2 @@ from kagglesdk.kaggle_client import KaggleClient -from kagglesdk.kaggle_env import KaggleEnv +from kagglesdk.kaggle_env import get_access_token_from_env, KaggleEnv diff --git a/src/kagglesdk/kaggle_client.py b/src/kagglesdk/kaggle_client.py index b375b6e..6cdb6ae 100644 --- a/src/kagglesdk/kaggle_client.py +++ b/src/kagglesdk/kaggle_client.py @@ -1,73 +1,74 @@ -from kagglesdk.kernels.services.kernels_api_service import KernelsApiClient -from kagglesdk.blobs.services.blob_api_service import BlobApiClient -from kagglesdk.education.services.education_api_service import EducationApiClient +from kagglesdk.security.services.oauth_service import OAuthClient from kagglesdk.models.services.model_api_service import ModelApiClient from kagglesdk.models.services.model_service import ModelClient +from kagglesdk.kernels.services.kernels_api_service import KernelsApiClient from kagglesdk.competitions.services.competition_api_service import CompetitionApiClient +from kagglesdk.education.services.education_api_service import EducationApiClient from kagglesdk.datasets.services.dataset_api_service import DatasetApiClient -from kagglesdk.admin.services.inbox_file_service import InboxFileClient -from kagglesdk.security.services.oauth_service import OAuthClient from kagglesdk.users.services.account_service import AccountClient +from kagglesdk.admin.services.inbox_file_service import InboxFileClient +from kagglesdk.blobs.services.blob_api_service import BlobApiClient from kagglesdk.kaggle_env import KaggleEnv from kagglesdk.kaggle_http_client import KaggleHttpClient class KaggleClient(object): - class Kernels(object): - def __init__(self, http_client: KaggleHttpClient): - self.kernels_api_client = KernelsApiClient(http_client) - - class Blobs(object): - def __init__(self, http_client: KaggleHttpClient): - self.blob_api_client = BlobApiClient(http_client) - - class Education(object): + class Security(object): def __init__(self, http_client: KaggleHttpClient): - self.education_api_client = EducationApiClient(http_client) + self.oauth_client = OAuthClient(http_client) class Models(object): def __init__(self, http_client: KaggleHttpClient): self.model_api_client = ModelApiClient(http_client) self.model_client = ModelClient(http_client) + class Kernels(object): + def __init__(self, http_client: KaggleHttpClient): + self.kernels_api_client = KernelsApiClient(http_client) + class Competitions(object): def __init__(self, http_client: KaggleHttpClient): self.competition_api_client = CompetitionApiClient(http_client) + class Education(object): + def __init__(self, http_client: KaggleHttpClient): + self.education_api_client = EducationApiClient(http_client) + class Datasets(object): def __init__(self, http_client: KaggleHttpClient): self.dataset_api_client = DatasetApiClient(http_client) - class Admin(object): + class Users(object): def __init__(self, http_client: KaggleHttpClient): - self.inbox_file_client = InboxFileClient(http_client) + self.account_client = AccountClient(http_client) - class Security(object): + class Admin(object): def __init__(self, http_client: KaggleHttpClient): - self.oauth_client = OAuthClient(http_client) + self.inbox_file_client = InboxFileClient(http_client) - class Users(object): + class Blobs(object): def __init__(self, http_client: KaggleHttpClient): - self.account_client = AccountClient(http_client) + self.blob_api_client = BlobApiClient(http_client) - def __init__(self, env: KaggleEnv = None, verbose: bool = False, username: str = None, password: str = None): - self._http_client = http_client = KaggleHttpClient(env, verbose, self._renew_iap_token, username=username, password=password) - self.kernels = KaggleClient.Kernels(http_client) - self.blobs = KaggleClient.Blobs(http_client) - self.education = KaggleClient.Education(http_client) + def __init__(self, env: KaggleEnv = None, verbose: bool = False, username: str = None, password: str = None, api_token: str = None): + self._http_client = http_client = KaggleHttpClient(env, verbose, username=username, password=password, api_token=api_token) + self.security = KaggleClient.Security(http_client) self.models = KaggleClient.Models(http_client) + self.kernels = KaggleClient.Kernels(http_client) self.competitions = KaggleClient.Competitions(http_client) + self.education = KaggleClient.Education(http_client) self.datasets = KaggleClient.Datasets(http_client) - self.admin = KaggleClient.Admin(http_client) - self.security = KaggleClient.Security(http_client) self.users = KaggleClient.Users(http_client) + self.admin = KaggleClient.Admin(http_client) + self.blobs = KaggleClient.Blobs(http_client) self.username = username self.password = password + self.api_token = api_token - def http_client(self): + def http_client(self) -> str: return self._http_client - def _renew_iap_token(self): + def _renew_iap_token(self) -> str: return self.admin.admin_client.renew_iap_token() def __enter__(self): diff --git a/src/kagglesdk/kaggle_env.py b/src/kagglesdk/kaggle_env.py index ebcf4c2..732a416 100644 --- a/src/kagglesdk/kaggle_env.py +++ b/src/kagglesdk/kaggle_env.py @@ -1,12 +1,14 @@ import logging import os from enum import Enum +from pathlib import Path KAGGLE_NOTEBOOK_ENV_VAR_NAME = "KAGGLE_KERNEL_RUN_TYPE" KAGGLE_DATA_PROXY_URL_ENV_VAR_NAME = "KAGGLE_DATA_PROXY_URL" KAGGLE_API_V1_TOKEN_PATH = "KAGGLE_API_V1_TOKEN" -logger = logging.getLogger(__name__) +def get_logger(): + return logging.getLogger(__name__) class KaggleEnv(Enum): LOCAL = 0 # localhost @@ -50,10 +52,38 @@ def is_in_kaggle_notebook() -> bool: if os.getenv(KAGGLE_NOTEBOOK_ENV_VAR_NAME) is not None: if os.getenv(KAGGLE_DATA_PROXY_URL_ENV_VAR_NAME) is None: # Missing endpoint for the Jwt client - logger.warning( + get_logger().warning( "Can't use the Kaggle Cache. " f"The '{KAGGLE_DATA_PROXY_URL_ENV_VAR_NAME}' environment variable is not set." ) return False return True return False + +def _get_access_token_from_file(path): + if not path: + return (None, None) + + token_path = Path(path) + if not token_path.exists(): + return (None, None) + + token_value = token_path.read_text().strip() + if not token_value: + return (None, None) + + get_logger().debug(f"Using access token from file: \"{path}\"") + return (token_value, path) + +def get_access_token_from_env(): + if is_in_kaggle_notebook(): + token = _get_access_token_from_file(os.environ.get(KAGGLE_API_V1_TOKEN_PATH)) + if token: + return (token, KAGGLE_API_V1_TOKEN_PATH) + + access_token = os.environ.get('KAGGLE_API_TOKEN') + if access_token is not None: + get_logger().debug("Using access token from KAGGLE_API_TOKEN environment variable") + return (access_token, 'KAGGLE_API_TOKEN') + + return (None, None) diff --git a/src/kagglesdk/kaggle_http_client.py b/src/kagglesdk/kaggle_http_client.py index 2b22820..6dd0e40 100644 --- a/src/kagglesdk/kaggle_http_client.py +++ b/src/kagglesdk/kaggle_http_client.py @@ -9,7 +9,7 @@ import requests from urllib3.fields import RequestField -from kagglesdk.kaggle_env import get_endpoint, get_env, is_in_kaggle_notebook, KAGGLE_API_V1_TOKEN_PATH, KaggleEnv +from kagglesdk.kaggle_env import get_endpoint, get_env, is_in_kaggle_notebook, get_access_token_from_env, KaggleEnv from kagglesdk.kaggle_object import KaggleObject from typing import Type @@ -94,9 +94,9 @@ def __init__( self, env: KaggleEnv = None, verbose: bool = False, - renew_iap_token=None, - username=None, - password=None, + username: str = None, + password: str = None, + api_token: str = None ): self._env = env or get_env() self._signed_in = None @@ -105,6 +105,7 @@ def __init__( self._session = None self._username = username self._password = password + self._api_token = api_token def call( self, @@ -295,7 +296,7 @@ def _try_fill_auth(self): if self._signed_in is not None: return - api_token = os.getenv('KAGGLE_API_TOKEN') + api_token = self._api_token or os.getenv('KAGGLE_API_TOKEN') if api_token is not None: self._session.auth = KaggleHttpClient.BearerAuth(api_token) self._signed_in = True @@ -310,16 +311,11 @@ def _try_fill_auth(self): self._signed_in = True return - if is_in_kaggle_notebook(): - token_file_path_str = os.environ.get(KAGGLE_API_V1_TOKEN_PATH) - if token_file_path_str: - token_path = Path(token_file_path_str) - if token_path.exists(): - token_value = token_path.read_text().strip() - if token_value: - self._session.auth = KaggleHttpClient.BearerAuth(token_value) - self._signed_in = True - return + (api_token, _) = get_access_token_from_env() + if api_token is not None: + self._session.auth = KaggleHttpClient.BearerAuth(api_token) + self._signed_in = True + return self._signed_in = False diff --git a/tests/unit_tests.py b/tests/unit_tests.py index 9992d16..2dea37d 100644 --- a/tests/unit_tests.py +++ b/tests/unit_tests.py @@ -28,7 +28,6 @@ # Give it the module: unit_tests.TestKaggleApi # Set the working directory to: kaggle-api/tests # Define some envars: -# KAGGLE_API_ENDPOINT=http://localhost # KAGGLE_API_ENVIRONMENT=LOCALHOST # KAGGLE_CONFIG_DIR=/home/kaggle/.config/kaggle/dev # KAGGLE_KEY=local_api_token diff --git a/tools/use-localhost.sh b/tools/use-localhost.sh index a62190b..a5413ce 100755 --- a/tools/use-localhost.sh +++ b/tools/use-localhost.sh @@ -7,7 +7,7 @@ if [[ "$0" == "$BASH_SOURCE" ]]; then exit 1 fi -export KAGGLE_API_ENDPOINT=http://localhost +export KAGGLE_API_ENVIRONMENT=LOCALHOST export KAGGLE_CONFIG_DIR=$(realpath "${XDG_CONFIG_HOME:-$HOME/.config}/kaggle/dev") KAGGLE_CONFIG_FILE="$KAGGLE_CONFIG_DIR/kaggle.json" diff --git a/tools/use-prod.sh b/tools/use-prod.sh index fb0fc86..03e132a 100755 --- a/tools/use-prod.sh +++ b/tools/use-prod.sh @@ -7,7 +7,7 @@ if [[ "$0" == "$BASH_SOURCE" ]]; then exit 1 fi -unset KAGGLE_API_ENDPOINT +unset KAGGLE_API_ENVIRONMENT unset KAGGLE_CONFIG_DIR if ! [[ -f "${XDG_CONFIG_HOME:-$HOME/.config}/kaggle/kaggle.json" ]]; then