diff --git a/documentation/tutorials/kaggle_intermediate_example_classification.ipynb b/documentation/tutorials/kaggle_intermediate_example_classification.ipynb
new file mode 100644
index 00000000..0de33a31
--- /dev/null
+++ b/documentation/tutorials/kaggle_intermediate_example_classification.ipynb
@@ -0,0 +1,8616 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "##### Copyright 2022 The TensorFlow Authors."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+ "# you may not use this file except in compliance with the License.\n",
+ "# You may obtain a copy of the License at\n",
+ "#\n",
+ "# https://www.apache.org/licenses/LICENSE-2.0\n",
+ "#\n",
+ "# Unless required by applicable law or agreed to in writing, software\n",
+ "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+ "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+ "# See the License for the specific language governing permissions and\n",
+ "# limitations under the License."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "hdIzhk1eKt5k"
+ },
+ "source": [
+ "# Intermediate classification with Kaggle data using TF-DF\n",
+ "
\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "TKaV2aZOT8Q0"
+ },
+ "source": [
+ "## Configuration"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4v1x0sj6k7nL"
+ },
+ "source": [
+ "To run this notebook, you need to have a Kaggle account.\n",
+ "\n",
+ "If you do not have an account, create one here: [Kaggle Register](https://www.kaggle.com/account/login?phase=startRegisterTab&returnUrl=%2F) \n",
+ "\n",
+ "Read through the [Authentication Section](https://www.kaggle.com/docs/api#authentication) of the Kaggle API documentation to get a key for the following section."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {
+ "cellView": "form",
+ "id": "MY7To2jpMj_x"
+ },
+ "outputs": [],
+ "source": [
+ "#@title Enter your Kaggle token in order to fetch the dataset\n",
+ "\n",
+ "username = '' #@param {type:\"string\"}\n",
+ "key = '' #@param {type: \"string\"}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "W3qcScNeT1G2"
+ },
+ "outputs": [],
+ "source": [
+ "#@title Configure Kaggle\n",
+ "try:\n",
+ " from google.colab import files, drive\n",
+ "\n",
+ " # Install and Configure Kaggle\n",
+ " import json\n",
+ "\n",
+ " token = {\n",
+ " \"username\":username,\n",
+ " \"key\":key\n",
+ " }\n",
+ "\n",
+ " # Installing kaggle\n",
+ " !pip install kaggle &> /dev/null\n",
+ "\n",
+ " # Creating .kaggle if necessary\n",
+ " !if [ -d .kaggle ]; then echo \".kaggle exists\"; else echo \".kaggle does not exist ... Creating it\"; mkdir .kaggle; if [ -d .kaggle ]; then echo \"Successfully created\"; else echo \"Error creating .kaggle\"; fi; fi\n",
+ "\n",
+ " with open('/content/.kaggle/kaggle.json', 'w') as file:\n",
+ " json.dump(token, file)\n",
+ "\n",
+ " # Creating .kaggle if necessary\n",
+ " !if [ -d ~/.kaggle ]; then echo \" ~/.kaggle exists\"; else echo \" ~/.kaggle does not exist ... Creating it\"; mkdir ~/.kaggle; if [ -d ~/.kaggle ]; then echo \"Successfully created\"; else echo \"Error creating ~/.kaggle\"; fi; fi\n",
+ " !cp /content/.kaggle/kaggle.json ~/.kaggle/kaggle.json\n",
+ "\n",
+ " # kaggle configuration\n",
+ " !kaggle config set -n path -v{/content}\n",
+ "\n",
+ " # Changing mode\n",
+ " !chmod 600 /root/.kaggle/kaggle.json\n",
+ "except Exception:\n",
+ " pass"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "haNXIxSBUEXI"
+ },
+ "outputs": [],
+ "source": [
+ "#@title Download Dataset\n",
+ "import os\n",
+ "\n",
+ "DOWNLOAD_LOCATION = \"/root/Downloads/\"\n",
+ "\n",
+ "if os.path.exists(DOWNLOAD_LOCATION):\n",
+ " if os.path.isdir(DOWNLOAD_LOCATION):\n",
+ " print(\"{} exists and is a directory\".format(DOWNLOAD_LOCATION))\n",
+ " else:\n",
+ " print(\"{} exists but is not a directory!!!\".format(DOWNLOAD_LOCATION))\n",
+ "else:\n",
+ " print(\"{} does not exist ... Creating it\".format(DOWNLOAD_LOCATION))\n",
+ " os.makedirs(DOWNLOAD_LOCATION)\n",
+ "\n",
+ "# Downloading\n",
+ "!kaggle competitions download -c tabular-playground-series-sep-2021 -p {DOWNLOAD_LOCATION}\n",
+ "\n",
+ "# Extracting archives\n",
+ "!cd {DOWNLOAD_LOCATION}; unzip -qq \\*.zip; rm -f *.zip"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "cellView": "form",
+ "id": "X728nqEoHk8U"
+ },
+ "outputs": [],
+ "source": [
+ "#@title Install TensorFlow Decision Forests\n",
+ "!pip install tensorflow_decision_forests -U -q"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {
+ "cellView": "form",
+ "id": "givUtNyHSVe_"
+ },
+ "outputs": [],
+ "source": [
+ "#@title User Input Configuration\n",
+ "\n",
+ "rnd_seed = 42#@param {type:\"number\"}\n",
+ "validation_ratio = 0.1 #@param {type:\"slider\", min:0, max:1, step:0.05}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "jACDpm0GUyXQ"
+ },
+ "source": [
+ "### Import the libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {
+ "cellView": "form",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "Bx3jmgamUgS3",
+ "outputId": "1b823264-e90e-468b-d5f5-bb523b6a7687"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TensorFlow Version: 2.9.1\n",
+ "TensorFlow Decision Forests: 0.2.7\n"
+ ]
+ }
+ ],
+ "source": [
+ "#@title\n",
+ "import os\n",
+ "import sys\n",
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import tensorflow as tf\n",
+ "import tensorflow_decision_forests as tfdf\n",
+ "\n",
+ "from tensorflow import keras\n",
+ "\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "from IPython.core.magic import register_line_magic\n",
+ "from IPython.display import Javascript\n",
+ "\n",
+ "print(\"TensorFlow Version: {}\".format(tf.__version__))\n",
+ "print(\"TensorFlow Decision Forests: {}\".format(tfdf.__version__))\n",
+ "\n",
+ "# Some of the model training logs can cover the full\n",
+ "# screen if not compressed to a smaller viewport.\n",
+ "# This magic allows setting a max height for a cell.\n",
+ "@register_line_magic\n",
+ "def set_cell_height(size):\n",
+ " display(\n",
+ " Javascript(\"google.colab.output.setIframeHeight(0, true, {maxHeight: \" +\n",
+ " str(size) + \"})\"))\n",
+ "\n",
+ "np.random.seed(rnd_seed)\n",
+ "tf.random.set_seed(rnd_seed)\n",
+ "\n",
+ "VALID_RATIO = validation_ratio"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Wsqy7G9hU1zT"
+ },
+ "source": [
+ "## Load the dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "IyuhaIxQUuFk",
+ "outputId": "41a5705b-b47a-4b47-e4ba-58dcdc992020"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Full train dataset shape is (957919, 120)\n"
+ ]
+ }
+ ],
+ "source": [
+ "train_file_path = os.path.join(DOWNLOAD_LOCATION, \"train.csv\")\n",
+ "train_full_data = pd.read_csv(train_file_path)\n",
+ "print(\"Full train dataset shape is {}\".format(train_full_data.shape))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 300
+ },
+ "id": "1o4jEv_LI5B8",
+ "outputId": "9693cfb0-7bb1-43a2-ed42-70bc150e1378"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " id \n",
+ " f1 \n",
+ " f2 \n",
+ " f3 \n",
+ " f4 \n",
+ " f5 \n",
+ " f6 \n",
+ " f7 \n",
+ " f8 \n",
+ " f9 \n",
+ " ... \n",
+ " f110 \n",
+ " f111 \n",
+ " f112 \n",
+ " f113 \n",
+ " f114 \n",
+ " f115 \n",
+ " f116 \n",
+ " f117 \n",
+ " f118 \n",
+ " claim \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 \n",
+ " 0 \n",
+ " 0.10859 \n",
+ " 0.004314 \n",
+ " -37.566 \n",
+ " 0.017364 \n",
+ " 0.28915 \n",
+ " -10.25100 \n",
+ " 135.12 \n",
+ " 168900.0 \n",
+ " 3.992400e+14 \n",
+ " ... \n",
+ " -12.2280 \n",
+ " 1.7482 \n",
+ " 1.90960 \n",
+ " -7.11570 \n",
+ " 4378.80 \n",
+ " 1.2096 \n",
+ " 8.613400e+14 \n",
+ " 140.1 \n",
+ " 1.01770 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 1 \n",
+ " 1 \n",
+ " 0.10090 \n",
+ " 0.299610 \n",
+ " 11822.000 \n",
+ " 0.276500 \n",
+ " 0.45970 \n",
+ " -0.83733 \n",
+ " 1721.90 \n",
+ " 119810.0 \n",
+ " 3.874100e+15 \n",
+ " ... \n",
+ " -56.7580 \n",
+ " 4.1684 \n",
+ " 0.34808 \n",
+ " 4.14200 \n",
+ " 913.23 \n",
+ " 1.2464 \n",
+ " 7.575100e+15 \n",
+ " 1861.0 \n",
+ " 0.28359 \n",
+ " 0 \n",
+ " \n",
+ " \n",
+ " 2 \n",
+ " 2 \n",
+ " 0.17803 \n",
+ " -0.006980 \n",
+ " 907.270 \n",
+ " 0.272140 \n",
+ " 0.45948 \n",
+ " 0.17327 \n",
+ " 2298.00 \n",
+ " 360650.0 \n",
+ " 1.224500e+13 \n",
+ " ... \n",
+ " -5.7688 \n",
+ " 1.2042 \n",
+ " 0.26290 \n",
+ " 8.13120 \n",
+ " 45119.00 \n",
+ " 1.1764 \n",
+ " 3.218100e+14 \n",
+ " 3838.2 \n",
+ " 0.40690 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 3 \n",
+ " 3 \n",
+ " 0.15236 \n",
+ " 0.007259 \n",
+ " 780.100 \n",
+ " 0.025179 \n",
+ " 0.51947 \n",
+ " 7.49140 \n",
+ " 112.51 \n",
+ " 259490.0 \n",
+ " 7.781400e+13 \n",
+ " ... \n",
+ " -34.8580 \n",
+ " 2.0694 \n",
+ " 0.79631 \n",
+ " -16.33600 \n",
+ " 4952.40 \n",
+ " 1.1784 \n",
+ " 4.533000e+12 \n",
+ " 4889.1 \n",
+ " 0.51486 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ " 4 \n",
+ " 4 \n",
+ " 0.11623 \n",
+ " 0.502900 \n",
+ " -109.150 \n",
+ " 0.297910 \n",
+ " 0.34490 \n",
+ " -0.40932 \n",
+ " 2538.90 \n",
+ " 65332.0 \n",
+ " 1.907200e+15 \n",
+ " ... \n",
+ " -13.6410 \n",
+ " 1.5298 \n",
+ " 1.14640 \n",
+ " -0.43124 \n",
+ " 3856.50 \n",
+ " 1.4830 \n",
+ " -8.991300e+12 \n",
+ " NaN \n",
+ " 0.23049 \n",
+ " 1 \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
5 rows × 120 columns
\n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " "
+ ],
+ "text/plain": [
+ " id f1 f2 f3 f4 f5 f6 f7 \\\n",
+ "0 0 0.10859 0.004314 -37.566 0.017364 0.28915 -10.25100 135.12 \n",
+ "1 1 0.10090 0.299610 11822.000 0.276500 0.45970 -0.83733 1721.90 \n",
+ "2 2 0.17803 -0.006980 907.270 0.272140 0.45948 0.17327 2298.00 \n",
+ "3 3 0.15236 0.007259 780.100 0.025179 0.51947 7.49140 112.51 \n",
+ "4 4 0.11623 0.502900 -109.150 0.297910 0.34490 -0.40932 2538.90 \n",
+ "\n",
+ " f8 f9 ... f110 f111 f112 f113 f114 \\\n",
+ "0 168900.0 3.992400e+14 ... -12.2280 1.7482 1.90960 -7.11570 4378.80 \n",
+ "1 119810.0 3.874100e+15 ... -56.7580 4.1684 0.34808 4.14200 913.23 \n",
+ "2 360650.0 1.224500e+13 ... -5.7688 1.2042 0.26290 8.13120 45119.00 \n",
+ "3 259490.0 7.781400e+13 ... -34.8580 2.0694 0.79631 -16.33600 4952.40 \n",
+ "4 65332.0 1.907200e+15 ... -13.6410 1.5298 1.14640 -0.43124 3856.50 \n",
+ "\n",
+ " f115 f116 f117 f118 claim \n",
+ "0 1.2096 8.613400e+14 140.1 1.01770 1 \n",
+ "1 1.2464 7.575100e+15 1861.0 0.28359 0 \n",
+ "2 1.1764 3.218100e+14 3838.2 0.40690 1 \n",
+ "3 1.1784 4.533000e+12 4889.1 0.51486 1 \n",
+ "4 1.4830 -8.991300e+12 NaN 0.23049 1 \n",
+ "\n",
+ "[5 rows x 120 columns]"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_full_data.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "8iTmy5KIU__x"
+ },
+ "source": [
+ "The data is composed of 120 columns all of which are numerical:\n",
+ "* 118 feature columns named `f1, f2, ... f118`\n",
+ "* label column named `claim`\n",
+ "* An `id` column that we will drop"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {
+ "id": "vkY2UJ3qi5y-"
+ },
+ "outputs": [],
+ "source": [
+ "train_full_data = train_full_data.drop('id', axis=1)\n",
+ "features = [f'f{i}' for i in range(1, 119)]\n",
+ "label = 'claim'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "SDi4a-YSVNs5"
+ },
+ "source": [
+ "Let's check if we have missing data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "AaFgKdIEVTUi",
+ "outputId": "ef2a9c21-a2d8-4da3-c28f-806d7d374fc9"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "f1 15247\n",
+ "f2 15190\n",
+ "f3 15491\n",
+ "f4 15560\n",
+ "f5 15405\n",
+ " ... \n",
+ "f114 15438\n",
+ "f115 15559\n",
+ "f116 15589\n",
+ "f117 15407\n",
+ "f118 15212\n",
+ "Length: 118, dtype: int64"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "train_full_data[features].isna().sum()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Yu3APw9xVX3a"
+ },
+ "source": [
+ "We can see that the data contains a lot of missing values. Approximately 15000 for each feature. That's around 1.5%\n",
+ "\n",
+ "The approach we will take in this notebook is to keep missing values, but add 3 additional features:\n",
+ "* `Number of missing values` in each sample\n",
+ " * For each sample out of the 957919, we will see how many values are missing across all features and then include this as a new feature. \n",
+ " * If there were n features missing, we will record the number n. \n",
+ "* `Standard deviation` over axis=1\n",
+ " * Standard deviation for each sample.\n",
+ "* `Unbiased Variance` over axis=1\n",
+ " * Variance for each sample.\n",
+ "\n",
+ "This preprocessing, and feature addition, will be implemented through the 2 methods mentioned previously. \n",
+ "1. Preprocessing using pandas\n",
+ "2. Tensorflow based Preprocessing\n",
+ "\n",
+ "Let's start with the first one."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "id": "yygwiPJQJUlV"
+ },
+ "outputs": [],
+ "source": [
+ "def split_dataset(dataset, test_ratio=0.1):\n",
+ " test_indices = np.random.rand(len(dataset)) < test_ratio\n",
+ " return dataset[~test_indices], dataset[test_indices]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "KfpWBa-JVffi"
+ },
+ "source": [
+ "## First Approach: Preprocessing using pandas"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "k8ommX7yVm6y"
+ },
+ "source": [
+ "### Preprocessing\n",
+ "\n",
+ "In the approach that we will use in this notebook, we will keep the missing values but will add 3 additional features:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {
+ "id": "QUyVn3nrVrFV"
+ },
+ "outputs": [],
+ "source": [
+ "train_full_data['nan'] = train_full_data[features].isnull().sum(axis=1)\n",
+ "train_full_data['std'] = train_full_data[features].std(axis=1)\n",
+ "train_full_data['var'] = train_full_data[features].var(axis=1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4kBJd8s-Vxwx"
+ },
+ "source": [
+ "### Datasets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "7OaM2nb61m7G"
+ },
+ "source": [
+ "Split the dataframe into training and validation sets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "1qpdOQ0KKNoL",
+ "outputId": "7067b30d-2f58-4965-b577-71caa1a7bba4"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "862229 samples in training and 95690 in validation\n"
+ ]
+ }
+ ],
+ "source": [
+ "train_ds_pd, valid_ds_pd = split_dataset(train_full_data, test_ratio=VALID_RATIO)\n",
+ "print(\"{} samples in training and {} in validation\".format(train_ds_pd.shape[0], valid_ds_pd.shape[0]))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "_5IDInwd1wV6"
+ },
+ "source": [
+ "Create the training and validation datasets using TensorFlow Decision Forests `pd_dataframe_to_tf_dataset`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "EuwlnDUJKnuK"
+ },
+ "outputs": [],
+ "source": [
+ "train_ds = tfdf.keras.pd_dataframe_to_tf_dataset(train_ds_pd, label=label)\n",
+ "valid_ds = tfdf.keras.pd_dataframe_to_tf_dataset(valid_ds_pd, label=label)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "nL_GBdCBm4fA"
+ },
+ "source": [
+ "### GradientBoostedTreesModel Training"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "xLF4aRvkWKLY"
+ },
+ "source": [
+ "For hyperparameter tuning, we did the following:\n",
+ "* Tried the predefined hyperparameters which did not give good results \n",
+ " * Especially `benchmark_rank1` which gave very bad results not to mention that it takes longer time to fit the data. \n",
+ " * `better_default` on the other hand gave acceptable results.\n",
+ "* Used Keras tuner in order to search for hyperparameters that maximise `AUC`. \n",
+ " * If you want to check how to do this, check out the following Kaggle [notebook](https://www.kaggle.com/ekaterinadranitsyna/kerastuner-tf-decision-forest?linkId=133421702) by Ekaterina Dranitsyna.\n",
+ "\n",
+ "Finally the hyperparameters that gave the best results where the below which are `better_default` with `l1_regularization`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "id": "0odOG59vm4fI"
+ },
+ "outputs": [],
+ "source": [
+ "model_1 = tfdf.keras.GradientBoostedTreesModel(\n",
+ " growing_strategy = 'BEST_FIRST_GLOBAL',\n",
+ " l1_regularization = 0.8\n",
+ ")\n",
+ "\n",
+ "model_1.compile(metrics=[keras.metrics.AUC()])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "zTNqlB-Y2geP"
+ },
+ "source": [
+ "The next cell will take some time to get executed.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 300
+ },
+ "id": "QEZlOOnUN_4q",
+ "outputId": "966db7ee-7920-4033-a239-36b2cb786238"
+ },
+ "outputs": [
+ {
+ "data": {
+ "application/javascript": [
+ "google.colab.output.setIframeHeight(0, true, {maxHeight: 300})"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Reading training dataset...\n",
+ "Training tensor examples:\n",
+ "Features: {'f1': , 'f2': , 'f3': , 'f4': , 'f5': , 'f6': , 'f7': , 'f8': , 'f9': , 'f10': , 'f11': , 'f12': , 'f13': , 'f14': , 'f15': , 'f16': , 'f17': , 'f18': , 'f19': , 'f20': , 'f21': , 'f22': , 'f23': , 'f24': , 'f25': , 'f26': , 'f27': , 'f28': , 'f29': , 'f30': , 'f31': , 'f32': , 'f33': , 'f34': , 'f35': , 'f36': , 'f37': , 'f38': , 'f39': , 'f40': , 'f41': , 'f42': , 'f43': , 'f44': , 'f45': , 'f46': , 'f47': , 'f48': , 'f49': , 'f50': , 'f51': , 'f52': , 'f53': , 'f54': , 'f55': , 'f56': , 'f57': , 'f58': , 'f59': , 'f60': , 'f61': , 'f62': , 'f63': , 'f64': , 'f65': , 'f66': , 'f67': , 'f68': , 'f69': , 'f70': , 'f71': , 'f72': , 'f73': , 'f74': , 'f75': , 'f76': , 'f77': , 'f78': , 'f79': , 'f80': , 'f81': , 'f82': , 'f83': , 'f84': , 'f85': , 'f86': , 'f87': , 'f88': , 'f89': , 'f90': , 'f91': , 'f92': , 'f93': , 'f94': , 'f95': , 'f96': , 'f97': , 'f98': , 'f99': , 'f100': , 'f101': , 'f102': , 'f103': , 'f104': , 'f105': , 'f106': , 'f107': , 'f108': , 'f109': , 'f110': , 'f111': , 'f112': , 'f113': , 'f114': , 'f115': , 'f116': , 'f117': , 'f118': , 'nan': , 'std': , 'var': }\n",
+ "Label: Tensor(\"data_121:0\", shape=(None,), dtype=int64)\n",
+ "Weights: None\n",
+ "Normalized tensor features:\n",
+ " {'f1': SemanticTensor(semantic=, tensor=), 'f2': SemanticTensor(semantic=, tensor=), 'f3': SemanticTensor(semantic=, tensor=), 'f4': SemanticTensor(semantic=, tensor=), 'f5': SemanticTensor(semantic=, tensor=), 'f6': SemanticTensor(semantic=, tensor=), 'f7': SemanticTensor(semantic=, tensor=), 'f8': SemanticTensor(semantic=, tensor=), 'f9': SemanticTensor(semantic=, tensor=), 'f10': SemanticTensor(semantic=, tensor=), 'f11': SemanticTensor(semantic=, tensor=), 'f12': SemanticTensor(semantic=, tensor=), 'f13': SemanticTensor(semantic=, tensor=), 'f14': SemanticTensor(semantic=, tensor=), 'f15': SemanticTensor(semantic=, tensor=), 'f16': SemanticTensor(semantic=, tensor=), 'f17': SemanticTensor(semantic=, tensor=), 'f18': SemanticTensor(semantic=, tensor=), 'f19': SemanticTensor(semantic=, tensor=), 'f20': SemanticTensor(semantic=, tensor=), 'f21': SemanticTensor(semantic=, tensor=), 'f22': SemanticTensor(semantic=, tensor=), 'f23': SemanticTensor(semantic=, tensor=), 'f24': SemanticTensor(semantic=, tensor=), 'f25': SemanticTensor(semantic=, tensor=), 'f26': SemanticTensor(semantic=, tensor=), 'f27': SemanticTensor(semantic=, tensor=), 'f28': SemanticTensor(semantic=, tensor=), 'f29': SemanticTensor(semantic=, tensor=), 'f30': SemanticTensor(semantic=, tensor=), 'f31': SemanticTensor(semantic=, tensor=), 'f32': SemanticTensor(semantic=, tensor=), 'f33': SemanticTensor(semantic=, tensor=), 'f34': SemanticTensor(semantic=, tensor=), 'f35': SemanticTensor(semantic=, tensor=), 'f36': SemanticTensor(semantic=, tensor=), 'f37': SemanticTensor(semantic=, tensor=), 'f38': SemanticTensor(semantic=, tensor=), 'f39': SemanticTensor(semantic=, tensor=), 'f40': SemanticTensor(semantic=, tensor=), 'f41': SemanticTensor(semantic=, tensor=), 'f42': SemanticTensor(semantic=, tensor=), 'f43': SemanticTensor(semantic=, tensor=), 'f44': SemanticTensor(semantic=, tensor=), 'f45': SemanticTensor(semantic=, tensor=), 'f46': SemanticTensor(semantic=, tensor=), 'f47': SemanticTensor(semantic=, tensor=), 'f48': SemanticTensor(semantic=, tensor=), 'f49': SemanticTensor(semantic=, tensor=), 'f50': SemanticTensor(semantic=, tensor=), 'f51': SemanticTensor(semantic=, tensor=), 'f52': SemanticTensor(semantic=, tensor=), 'f53': SemanticTensor(semantic=, tensor=), 'f54': SemanticTensor(semantic=, tensor=), 'f55': SemanticTensor(semantic=, tensor=), 'f56': SemanticTensor(semantic=, tensor=), 'f57': SemanticTensor(semantic=, tensor=), 'f58': SemanticTensor(semantic=, tensor=), 'f59': SemanticTensor(semantic=, tensor=), 'f60': SemanticTensor(semantic=, tensor=), 'f61': SemanticTensor(semantic=, tensor=), 'f62': SemanticTensor(semantic=, tensor=), 'f63': SemanticTensor(semantic=, tensor=), 'f64': SemanticTensor(semantic=, tensor=), 'f65': SemanticTensor(semantic=, tensor=), 'f66': SemanticTensor(semantic=, tensor=