diff --git a/transformers_doc/en/hpo_transformers_hpo_examples_en.ipynb b/transformers_doc/en/hpo_transformers_hpo_examples_en.ipynb
new file mode 100644
index 00000000..88f7936c
--- /dev/null
+++ b/transformers_doc/en/hpo_transformers_hpo_examples_en.ipynb
@@ -0,0 +1,272 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a5a74135",
+   "metadata": {},
+   "source": [
+    "# Hyperparameter Search Examples with 🤗 Transformers\n",
+    "This notebook demonstrates how to use various hyperparameter optimization backends (Optuna, Ray Tune, SigOpt, W&B) with 🤗 Transformers' `Trainer`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2d6a084f",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "Before running the examples below, make sure to install all required packages:\n",
+    "```bash\n",
+    "pip install transformers optuna \"ray[tune]\" sigopt wandb datasets\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4cf648e4",
+   "metadata": {},
+   "source": [
+    "## Data & Model Initialization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c8f40952",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer\n",
+    "\n",
+    "# Load a small subset of SST-2\n",
+    "dataset = load_dataset(\"glue\", \"sst2\", split=\"train[:200]\")\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased\")\n",
+    "\n",
+    "def preprocess(examples):\n",
+    "    return tokenizer(examples[\"sentence\"], truncation=True, padding=\"max_length\")\n",
+    "\n",
+    "dataset = dataset.map(preprocess, batched=True).train_test_split(test_size=0.2)\n",
+    "\n",
+    "def model_init():\n",
+    "    return AutoModelForSequenceClassification.from_pretrained(\"distilbert-base-uncased\", num_labels=2)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a04ce98b",
+   "metadata": {},
+   "source": [
+    "## Common Objective Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "56a66516",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Single-objective: minimize eval_loss\n",
+    "def compute_objective(metrics):\n",
+    "    return metrics[\"eval_loss\"]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c79d4e0f",
+   "metadata": {},
+   "source": [
+    "## 1. Optuna Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d08078c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import optuna\n",
+    "from transformers.integrations import EarlyStoppingCallback\n",
+    "\n",
+    "def optuna_hp_space(trial):\n",
+    "    return {\n",
+    "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-6, 1e-4, log=True),\n",
+    "        \"weight_decay\": trial.suggest_float(\"weight_decay\", 0.0, 0.3),\n",
+    "        \"num_train_epochs\": trial.suggest_int(\"num_train_epochs\", 1, 3),\n",
+    "        \"per_device_train_batch_size\": trial.suggest_categorical(\"per_device_train_batch_size\", [16, 32, 64]),\n",
+    "        \"warmup_steps\": trial.suggest_int(\"warmup_steps\", 0, 100),\n",
+    "    }\n",
+    "\n",
+    "training_args = TrainingArguments(\"optuna-hpo\", evaluation_strategy=\"epoch\", logging_steps=10)\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    args=training_args,\n",
+    "    train_dataset=dataset[\"train\"],\n",
+    "    eval_dataset=dataset[\"test\"],\n",
+    "    tokenizer=tokenizer,\n",
+    "    model_init=model_init,\n",
+    "    compute_metrics=lambda p: {\"eval_loss\": p.loss},\n",
+    "    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)],\n",
+    ")\n",
+    "\n",
+    "best_trial = trainer.hyperparameter_search(\n",
+    "    direction=\"minimize\",\n",
+    "    backend=\"optuna\",\n",
+    "    hp_space=optuna_hp_space,\n",
+    "    n_trials=5,\n",
+    "    compute_objective=compute_objective,\n",
+    ")\n",
+    "\n",
+    "print(\"Best Optuna trial:\", best_trial)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a8ff17dd",
+   "metadata": {},
+   "source": [
+    "## 2. Ray Tune Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c1c10b3b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ray import tune\n",
+    "from ray.tune.schedulers import ASHAScheduler\n",
+    "from ray.tune.search.hyperopt import HyperOptSearch\n",
+    "\n",
+    "def ray_hp_space(trial_config):\n",
+    "    return {\n",
+    "        \"learning_rate\": trial_config[\"learning_rate\"],\n",
+    "        \"per_device_train_batch_size\": trial_config[\"per_device_train_batch_size\"],\n",
+    "        \"num_train_epochs\": trial_config[\"num_train_epochs\"],\n",
+    "    }\n",
+    "\n",
+    "ray_search_space = {\n",
+    "    \"learning_rate\": tune.loguniform(1e-5, 1e-3),\n",
+    "    \"per_device_train_batch_size\": tune.choice([16, 32, 64]),\n",
+    "    \"num_train_epochs\": tune.choice([2, 3, 4]),\n",
+    "}\n",
+    "\n",
+    "training_args = TrainingArguments(\"ray-hpo\", evaluation_strategy=\"epoch\", logging_steps=10)\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    args=training_args,\n",
+    "    train_dataset=dataset[\"train\"],\n",
+    "    eval_dataset=dataset[\"test\"],\n",
+    "    tokenizer=tokenizer,\n",
+    "    model_init=model_init,\n",
+    "    compute_metrics=lambda p: {\n",
+    "        \"eval_loss\": p.loss,\n",
+    "        \"eval_accuracy\": (p.predictions.argmax(-1) == p.label_ids).mean()\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "best_run = trainer.hyperparameter_search(\n",
+    "    direction=\"maximize\",\n",
+    "    backend=\"ray\",\n",
+    "    hp_space=ray_hp_space,\n",
+    "    n_trials=5,\n",
+    "    search_alg=HyperOptSearch(metric=\"eval_accuracy\", mode=\"max\"),\n",
+    "    scheduler=ASHAScheduler(metric=\"eval_accuracy\", mode=\"max\", max_t=3),\n",
+    "    resources_per_trial={\"cpu\": 1, \"gpu\": 0},\n",
+    "    compute_objective=lambda metrics: metrics[\"eval_accuracy\"],\n",
+    ")\n",
+    "\n",
+    "print(\"Best Ray Tune run:\", best_run)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1236b11b",
+   "metadata": {},
+   "source": [
+    "## 3. SigOpt Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bfd6cf35",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sigopt_hp_space(trial):\n",
+    "    return [\n",
+    "        {\"bounds\": {\"min\": 1e-6, \"max\": 1e-4}, \"name\": \"learning_rate\", \"type\": \"double\"},\n",
+    "        {\"bounds\": {\"min\": 0.0, \"max\": 0.3},   \"name\": \"weight_decay\", \"type\": \"double\"},\n",
+    "        {\"categorical_values\": [\"16\", \"32\", \"64\"], \"name\": \"per_device_train_batch_size\", \"type\": \"categorical\"},\n",
+    "        {\"bounds\": {\"min\": 1, \"max\": 3},        \"name\": \"num_train_epochs\", \"type\": \"int\"},\n",
+    "    ]\n",
+    "\n",
+    "best_trials = trainer.hyperparameter_search(\n",
+    "    direction=[\"minimize\", \"maximize\"],\n",
+    "    backend=\"sigopt\",\n",
+    "    hp_space=sigopt_hp_space,\n",
+    "    n_trials=5,\n",
+    "    compute_objective=lambda m: (m[\"eval_loss\"], m[\"eval_accuracy\"])\n",
+    ")\n",
+    "\n",
+    "print(\"Best SigOpt trials:\", best_trials)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b1cb72e0",
+   "metadata": {},
+   "source": [
+    "## 4. Weights & Biases (W&B) Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd86a741",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import wandb\n",
+    "\n",
+    "def wandb_hp_space(trial):\n",
+    "    return {\n",
+    "        \"method\": \"random\",\n",
+    "        \"metric\": {\"name\": \"eval_loss\", \"goal\": \"minimize\"},\n",
+    "        \"parameters\": {\n",
+    "            \"learning_rate\": {\"distribution\": \"uniform\", \"min\": 1e-6, \"max\": 1e-4},\n",
+    "            \"per_device_train_batch_size\": {\"values\": [16, 32, 64]},\n",
+    "            \"num_train_epochs\": {\"values\": [1, 2, 3]},\n",
+    "        },\n",
+    "    }\n",
+    "\n",
+    "best_runs = trainer.hyperparameter_search(\n",
+    "    direction=\"minimize\",\n",
+    "    backend=\"wandb\",\n",
+    "    hp_space=wandb_hp_space,\n",
+    "    n_trials=5,\n",
+    "    compute_objective=compute_objective,\n",
+    ")\n",
+    "\n",
+    "print(\"Best W&B runs:\", best_runs)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ab87274",
+   "metadata": {},
+   "source": [
+    "**End of examples.**\n",
+    "\n",
+    "You can adjust `n_trials`, early stopping, objective functions, and other settings to suit your specific task."
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/transformers_doc/hpo_transformers_hpo_examples_en.ipynb b/transformers_doc/hpo_transformers_hpo_examples_en.ipynb
new file mode 100644
index 00000000..593a369d
--- /dev/null
+++ b/transformers_doc/hpo_transformers_hpo_examples_en.ipynb
@@ -0,0 +1,276 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a5a74135",
+   "metadata": {},
+   "source": [
+    "# Hyperparameter Search Examples with 🤗 Transformers\n",
+    "This notebook demonstrates how to use various hyperparameter optimization backends (Optuna, Ray Tune, SigOpt, W&B) with 🤗 Transformers' `Trainer`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2d6a084f",
+   "metadata": {},
+   "source": [
+    "## Setup\n",
+    "Before running the examples below, make sure to install all required packages:\n",
+    "```bash\n",
+    "pip install transformers optuna \"ray[tune]\" sigopt wandb datasets\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4cf648e4",
+   "metadata": {},
+   "source": [
+    "## Data & Model Initialization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c8f40952",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer\n",
+    "\n",
+    "# Load a small subset of SST-2\n",
+    "dataset = load_dataset(\"glue\", \"sst2\", split=\"train[:200]\")\n",
+    "tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased\")\n",
+    "\n",
+    "def preprocess(examples):\n",
+    "    return tokenizer(examples[\"sentence\"], truncation=True, padding=\"max_length\")\n",
+    "\n",
+    "dataset = dataset.map(preprocess, batched=True).train_test_split(test_size=0.2)\n",
+    "\n",
+    "def model_init():\n",
+    "    return AutoModelForSequenceClassification.from_pretrained(\"distilbert-base-uncased\", num_labels=2)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a04ce98b",
+   "metadata": {},
+   "source": [
+    "## Common Objective Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "56a66516",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Single-objective: minimize eval_loss\n",
+    "def compute_objective(metrics):\n",
+    "    return metrics[\"eval_loss\"]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c79d4e0f",
+   "metadata": {},
+   "source": [
+    "## 1. Optuna Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d08078c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import optuna\n",
+    "from transformers.integrations import EarlyStoppingCallback\n",
+    "\n",
+    "def optuna_hp_space(trial):\n",
+    "    return {\n",
+    "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 1e-6, 1e-4, log=True),\n",
+    "        \"weight_decay\": trial.suggest_float(\"weight_decay\", 0.0, 0.3),\n",
+    "        \"num_train_epochs\": trial.suggest_int(\"num_train_epochs\", 1, 3),\n",
+    "        \"per_device_train_batch_size\": trial.suggest_categorical(\"per_device_train_batch_size\", [16, 32, 64]),\n",
+    "        \"warmup_steps\": trial.suggest_int(\"warmup_steps\", 0, 100),\n",
+    "    }\n",
+    "\n",
+    "training_args = TrainingArguments(\"optuna-hpo\", evaluation_strategy=\"epoch\", logging_steps=10)\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    args=training_args,\n",
+    "    train_dataset=dataset[\"train\"],\n",
+    "    eval_dataset=dataset[\"test\"],\n",
+    "    tokenizer=tokenizer,\n",
+    "    model_init=model_init,\n",
+    "    compute_metrics=lambda p: {\"eval_loss\": p.loss},\n",
+    "    callbacks=[EarlyStoppingCallback(early_stopping_patience=1)],\n",
+    ")\n",
+    "\n",
+    "best_trial = trainer.hyperparameter_search(\n",
+    "    direction=\"minimize\",\n",
+    "    backend=\"optuna\",\n",
+    "    hp_space=optuna_hp_space,\n",
+    "    n_trials=5,\n",
+    "    compute_objective=compute_objective,\n",
+    ")\n",
+    "\n",
+    "print(\"Best Optuna trial:\", best_trial)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a8ff17dd",
+   "metadata": {},
+   "source": [
+    "## 2. Ray Tune Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c1c10b3b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from ray import tune\n",
+    "from ray.tune.schedulers import ASHAScheduler\n",
+    "from ray.tune.search.hyperopt import HyperOptSearch\n",
+    "\n",
+    "def ray_hp_space(trial_config):\n",
+    "    return {\n",
+    "        \"learning_rate\": trial_config[\"learning_rate\"],\n",
+    "        \"per_device_train_batch_size\": trial_config[\"per_device_train_batch_size\"],\n",
+    "        \"num_train_epochs\": trial_config[\"num_train_epochs\"],\n",
+    "    }\n",
+    "\n",
+    "ray_search_space = {\n",
+    "    \"learning_rate\": tune.loguniform(1e-5, 1e-3),\n",
+    "    \"per_device_train_batch_size\": tune.choice([16, 32, 64]),\n",
+    "    \"num_train_epochs\": tune.choice([2, 3, 4]),\n",
+    "}\n",
+    "\n",
+    "training_args = TrainingArguments(\"ray-hpo\", evaluation_strategy=\"epoch\", logging_steps=10)\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    args=training_args,\n",
+    "    train_dataset=dataset[\"train\"],\n",
+    "    eval_dataset=dataset[\"test\"],\n",
+    "    tokenizer=tokenizer,\n",
+    "    model_init=model_init,\n",
+    "    compute_metrics=lambda p: {\n",
+    "        \"eval_loss\": p.loss,\n",
+    "        \"eval_accuracy\": (p.predictions.argmax(-1) == p.label_ids).mean()\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "best_run = trainer.hyperparameter_search(\n",
+    "    direction=\"maximize\",\n",
+    "    backend=\"ray\",\n",
+    "    hp_space=ray_hp_space,\n",
+    "    n_trials=5,\n",
+    "    search_alg=HyperOptSearch(metric=\"eval_accuracy\", mode=\"max\"),\n",
+    "    scheduler=ASHAScheduler(metric=\"eval_accuracy\", mode=\"max\", max_t=3),\n",
+    "    resources_per_trial={\"cpu\": 1, \"gpu\": 0},\n",
+    "    compute_objective=lambda metrics: metrics[\"eval_accuracy\"],\n",
+    ")\n",
+    "\n",
+    "print(\"Best Ray Tune run:\", best_run)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1236b11b",
+   "metadata": {},
+   "source": [
+    "## 3. SigOpt Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bfd6cf35",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sigopt_hp_space(trial):\n",
+    "    return [\n",
+    "        {\"bounds\": {\"min\": 1e-6, \"max\": 1e-4}, \"name\": \"learning_rate\", \"type\": \"double\"},\n",
+    "        {\"bounds\": {\"min\": 0.0, \"max\": 0.3},   \"name\": \"weight_decay\", \"type\": \"double\"},\n",
+    "        {\"categorical_values\": [\"16\", \"32\", \"64\"], \"name\": \"per_device_train_batch_size\", \"type\": \"categorical\"},\n",
+    "        {\"bounds\": {\"min\": 1, \"max\": 3},        \"name\": \"num_train_epochs\", \"type\": \"int\"},\n",
+    "    ]\n",
+    "\n",
+    "best_trials = trainer.hyperparameter_search(\n",
+    "    direction=[\"minimize\", \"maximize\"],\n",
+    "    backend=\"sigopt\",\n",
+    "    hp_space=sigopt_hp_space,\n",
+    "    n_trials=5,\n",
+    "    compute_objective=lambda m: (m[\"eval_loss\"], m[\"eval_accuracy\"])\n",
+    ")\n",
+    "\n",
+    "print(\"Best SigOpt trials:\", best_trials)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b1cb72e0",
+   "metadata": {},
+   "source": [
+    "## 4. Weights & Biases (W&B) Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fd86a741",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import wandb\n",
+    "\n",
+    "def wandb_hp_space(trial):\n",
+    "    return {\n",
+    "        \"method\": \"random\",\n",
+    "        \"metric\": {\"name\": \"eval_loss\", \"goal\": \"minimize\"},\n",
+    "        \"parameters\": {\n",
+    "            \"learning_rate\": {\"distribution\": \"uniform\", \"min\": 1e-6, \"max\": 1e-4},\n",
+    "            \"per_device_train_batch_size\": {\"values\": [16, 32, 64]},\n",
+    "            \"num_train_epochs\": {\"values\": [1, 2, 3]},\n",
+    "        },\n",
+    "    }\n",
+    "\n",
+    "best_runs = trainer.hyperparameter_search(\n",
+    "    direction=\"minimize\",\n",
+    "    backend=\"wandb\",\n",
+    "    hp_space=wandb_hp_space,\n",
+    "    n_trials=5,\n",
+    "    compute_objective=compute_objective,\n",
+    ")\n",
+    "\n",
+    "print(\"Best W&B runs:\", best_runs)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ab87274",
+   "metadata": {},
+   "source": [
+    "**End of examples.**\n",
+    "\n",
+    "You can adjust `n_trials`, early stopping, objective functions, and other settings to suit your specific task."
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}