diff --git a/DeepSeek-R1-sagemaker-inferenceComponent.ipynb b/DeepSeek-R1-sagemaker-inferenceComponent.ipynb new file mode 100644 index 0000000..7f8d969 --- /dev/null +++ b/DeepSeek-R1-sagemaker-inferenceComponent.ipynb @@ -0,0 +1,772 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "71a329f0", + "metadata": {}, + "source": [ + "# Sagemaker inference component for DeepSeek\n", + "use SageMaker new inference Componet to deply mutiple DeepSeek R1 Distill Model\n", + "- use SM DLC lmi image\n", + "- use mutiple GPU seperation & division for copies of DeepSeek R1 models(32B + 1.5B)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ec9ac353", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml\n", + "sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml\n" + ] + } + ], + "source": [ + "import boto3\n", + "import sagemaker\n", + "from sagemaker import Model, image_uris, serializers, deserializers\n", + "\n", + "role = sagemaker.get_execution_role() # execution role for the endpoint\n", + "sess = sagemaker.session.Session() # sagemaker session for interacting with different AWS APIs\n", + "region = sess._region_name # region name of the current SageMaker Studio environment\n", + "account_id = sess.account_id() # account_id of the current SageMaker Studio environment\n", + "bucket = sess.default_bucket()\n", + "s3_client = boto3.client(\"s3\")\n", + "sm_client = boto3.client(\"sagemaker\")\n", + "smr_client = boto3.client(\"sagemaker-runtime\")" + ] + }, + { + "cell_type": "markdown", + "id": "64191efa-d0fe-4b2d-a182-85c1c3d9a20f", + "metadata": {}, + "source": [ + "## endpoint setup & deploy" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "f880fe8e-1bbf-421c-9bc8-e8be268f2b9a", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'EndpointConfigArn': 'arn:aws:sagemaker:us-west-2:687912291502:endpoint-config/Sagemaker-inference-componet3',\n", + " 'ResponseMetadata': {'RequestId': '0850d666-ebb2-45e8-87d4-2556fb63d0ad',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': '0850d666-ebb2-45e8-87d4-2556fb63d0ad',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '110',\n", + " 'date': 'Fri, 21 Feb 2025 13:41:15 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import boto3\n", + "import sagemaker\n", + "\n", + "role = sagemaker.get_execution_role()\n", + "sm_client = boto3.client(service_name=\"sagemaker\")\n", + "endpoint_config_name=\"Sagemaker-inference-componet3\"\n", + "endpoint_name = \"Sagemaker-inference-componet-mme3\"\n", + "\n", + "!aws sagemaker delete-endpoint-config --endpoint-config-name Sagemaker-inference-componet3\n", + "\n", + "\n", + "sm_client.create_endpoint_config(\n", + " EndpointConfigName=endpoint_config_name,\n", + " ExecutionRoleArn=role,\n", + " ProductionVariants=[{\n", + " \"VariantName\": \"AllTraffic\",\n", + " \"InstanceType\": \"ml.g5.48xlarge\",\n", + " \"InitialInstanceCount\": 1,\n", + "\t\t\"RoutingConfig\": {\n", + " \"RoutingStrategy\": \"LEAST_OUTSTANDING_REQUESTS\"\n", + " }\n", + " }]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "b45783ca-57c0-4eed-83c5-e94827d2b28b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'EndpointArn': 'arn:aws:sagemaker:us-west-2:687912291502:endpoint/Sagemaker-inference-componet-mme3',\n", + " 'ResponseMetadata': {'RequestId': '0166bfbd-0da5-4ef2-8719-aa9d7ed45ce5',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': '0166bfbd-0da5-4ef2-8719-aa9d7ed45ce5',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '101',\n", + " 'date': 'Fri, 21 Feb 2025 13:41:15 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sm_client.create_endpoint(\n", + " EndpointName=endpoint_name,\n", + " EndpointConfigName=endpoint_config_name,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "076f0319-367d-4297-920b-24f9e5f8d41f", + "metadata": {}, + "source": [ + "## dummy model data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8435bcc9-c8a5-4dce-b19b-ab03bbe6d7ad", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "code/\n" + ] + } + ], + "source": [ + "%%sh\n", + "mkdir code\n", + "tar czvf source_code.tar.gz code/\n", + "rm -rf code" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "df8c4ecb-59f0-4ebd-8a82-1e7224dfbe7d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "S3 Code tar ball uploaded to --- > s3://sagemaker-us-west-2-687912291502/deepseek/source_code/source_code.tar.gz\n" + ] + } + ], + "source": [ + "s3_code_prefix = \"deepseek/source_code\"\n", + "source_data = sess.upload_data(\"source_code.tar.gz\", bucket, s3_code_prefix)\n", + "print(f\"S3 Code tar ball uploaded to --- > {source_data}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "be7f5cbc-d55b-48cd-a650-637e3f616881", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "mymodel/\n" + ] + } + ], + "source": [ + "%%sh\n", + "mkdir mymodel\n", + "tar czvf mymodel.tar.gz mymodel/\n", + "rm -rf mymodel" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "60a1f735-fb10-4693-9ad9-49520f23a2cd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "S3 Model tar ball uploaded to --- > s3://sagemaker-us-west-2-687912291502/deepseek/model/mymodel.tar.gz\n" + ] + } + ], + "source": [ + "s3_code_prefix = \"deepseek/model\"\n", + "model_data = sess.upload_data(\"mymodel.tar.gz\", bucket, s3_code_prefix)\n", + "print(f\"S3 Model tar ball uploaded to --- > {model_data}\")" + ] + }, + { + "cell_type": "markdown", + "id": "64e4c0fa-caa4-4bf1-bfaa-a2a076a9d6ba", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## ds qwen 1.5b model setup & deploy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ec178cd-c8e8-4f05-a105-91a9deae7990", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Image going to be used is ---- > 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.31.0-lmi13.0.0-cu124\n", + "Model created: arn:aws:sagemaker:us-west-2:687912291502:model/deepseek-r1-distill-qwen-1-5b-2025-02-22-03-56-13\n" + ] + } + ], + "source": [ + "import sagemaker\n", + "from sagemaker import Model, image_uris, serializers, deserializers\n", + "from sagemaker import get_execution_role\n", + "from sagemaker.pytorch import PyTorchModel\n", + "import time\n", + "\n", + "hf_model_id=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B\"\n", + "# get lim image\n", + "image_uri = \"763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.31.0-lmi13.0.0-cu124\"\n", + "print(f\"Image going to be used is ---- > {image_uri}\")\n", + "\n", + "\n", + "model_name= \"deepseek-r1-distill-qwen-1-5b-\"+time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime())\n", + "endpoint_name = \"Sagemaker-inference-componet-mme3\"\n", + "\n", + "vllm_config = {\n", + " \"HF_MODEL_ID\": hf_model_id,\n", + " \"OPTION_TENSOR_PARALLEL_DEGREE\": \"max\",\n", + " \"HF_TOKEN\": \"\",\n", + " \"OPTION_ROLLING_BATCH\": \"vllm\",\n", + " \"OPTION_OUTPUT_FORMATTER\": \"json\",\n", + " \"OPTION_MAX_ROLLING_BATCH_SIZE\": \"16\",\n", + " \"OPTION_MODEL_LOADING_TIMEOUT\": \"1600\",\n", + "}\n", + "\n", + "container_config = {\n", + " 'Image': image_uri,\n", + " 'ModelDataUrl': source_data,\n", + " 'Environment': vllm_config\n", + "}\n", + "\n", + "response = sm_client.create_model(\n", + " ModelName=model_name,\n", + " ExecutionRoleArn=role,\n", + " PrimaryContainer=container_config\n", + ")\n", + "\n", + "print(f\"Model created: {response['ModelArn']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "1feb42f6-1b9c-4b17-b470-6ddc0b6a0e06", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'InferenceComponentArn': 'arn:aws:sagemaker:us-west-2:687912291502:inference-component/IC-deepseek-r1-distill-qwen-1-5b-2025-02-22-03-56-14',\n", + " 'ResponseMetadata': {'RequestId': '094ba7a1-4b7a-4d0a-9536-c81d896e8445',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': '094ba7a1-4b7a-4d0a-9536-c81d896e8445',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '141',\n", + " 'date': 'Sat, 22 Feb 2025 03:56:14 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sm_client.create_inference_component(\n", + " InferenceComponentName=\"IC-deepseek-r1-distill-qwen-1-5b-\"+time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime()),\n", + " EndpointName=endpoint_name,\n", + " VariantName=\"AllTraffic\",\n", + " Specification={\n", + " \"ModelName\": model_name,\n", + " \"ComputeResourceRequirements\": {\n", + "\t\t \"NumberOfAcceleratorDevicesRequired\": 1, \n", + "\t\t\t#\"NumberOfCpuCoresRequired\": 2, \n", + "\t\t\t\"MinMemoryRequiredInMb\": 4096\n", + "\t }\n", + " },\n", + " RuntimeConfig={\"CopyCount\": 1},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "77a202cc-901a-4441-98cd-73ecda1e236d", + "metadata": { + "tags": [] + }, + "source": [ + "## ds qwen 32b model setup & deploy" + ] + }, + { + "cell_type": "markdown", + "id": "c02bee56-d075-4b22-873e-745181fbe356", + "metadata": { + "tags": [] + }, + "source": [ + "#### boto3 api" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e9c4719-1f14-4476-a6f0-21eb85d053f8", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Image going to be used is ---- > 763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.31.0-lmi13.0.0-cu124\n", + "Model created: arn:aws:sagemaker:us-west-2:687912291502:model/deepseek-r1-distill-qwen-32b-2025-02-22-02-57-08\n" + ] + } + ], + "source": [ + "import sagemaker\n", + "from sagemaker import Model, image_uris, serializers, deserializers\n", + "from sagemaker import get_execution_role\n", + "from sagemaker.pytorch import PyTorchModel\n", + "import time\n", + "\n", + "hf_model_id=\"deepseek-ai/DeepSeek-R1-Distill-Qwen-32B\"\n", + "# 获取 lim 推理容器\n", + "image_uri = \"763104351884.dkr.ecr.us-west-2.amazonaws.com/djl-inference:0.31.0-lmi13.0.0-cu124\"\n", + "print(f\"Image going to be used is ---- > {image_uri}\")\n", + "\n", + "\n", + "model_name= \"deepseek-r1-distill-qwen-32b-\"+time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime())\n", + "endpoint_name = \"Sagemaker-inference-componet-mme3\"\n", + "\n", + "vllm_config = {\n", + " \"HF_MODEL_ID\": hf_model_id,\n", + " \"OPTION_TENSOR_PARALLEL_DEGREE\": \"4\",\n", + " \"HF_TOKEN\": \"\",\n", + " \"OPTION_ROLLING_BATCH\": \"vllm\",\n", + " \"OPTION_OUTPUT_FORMATTER\": \"json\",\n", + " \"OPTION_MAX_ROLLING_BATCH_SIZE\": \"10\",\n", + " \"OPTION_MAX_MODEL_LEN\":\"8092\",\n", + " \"OPTION_MODEL_LOADING_TIMEOUT\": \"3600\",\n", + "}\n", + "container_config = {\n", + " 'Image': image_uri,\n", + " 'ModelDataUrl': source_data,\n", + " 'Environment': vllm_config\n", + "}\n", + "\n", + "response = sm_client.create_model(\n", + " ModelName=model_name,\n", + " ExecutionRoleArn=role,\n", + " PrimaryContainer=container_config\n", + ")\n", + "print(f\"Model created: {response['ModelArn']}\")\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e1a82827-774c-40d6-8159-5addadfa7e64", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'InferenceComponentArn': 'arn:aws:sagemaker:us-west-2:687912291502:inference-component/IC-deepseek-r1-distill-qwen-32b-2025-02-22-02-57-09',\n", + " 'ResponseMetadata': {'RequestId': '99c16a61-bffc-4939-b763-dc75f86d05c0',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': '99c16a61-bffc-4939-b763-dc75f86d05c0',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '140',\n", + " 'date': 'Sat, 22 Feb 2025 02:57:09 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sm_client.create_inference_component(\n", + " InferenceComponentName=\"IC-deepseek-r1-distill-qwen-32b-\"+time.strftime(\"%Y-%m-%d-%H-%M-%S\", time.gmtime()),\n", + " EndpointName=endpoint_name,\n", + " VariantName=\"AllTraffic\",\n", + " Specification={\n", + " \"ModelName\": model_name,\n", + " \"ComputeResourceRequirements\": {\n", + "\t\t \"NumberOfAcceleratorDevicesRequired\": 4, \n", + "\t\t\t#\"NumberOfCpuCoresRequired\": 2, \n", + "\t\t\t\"MinMemoryRequiredInMb\": 80024\n", + "\t }\n", + " },\n", + " RuntimeConfig={\"CopyCount\": 1},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a34c3307-7928-479c-a8e9-e9f56763b818", + "metadata": { + "tags": [] + }, + "source": [ + "## inference test" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "db3bc85f-00c2-4225-83df-99d93f3ff8a5", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "IC-deepseek-r1-distill-qwen-1-5b-2025-02-22-03-56-14\n", + "IC-deepseek-r1-distill-qwen-32b-2025-02-22-02-57-09\n", + "{'generated_text': \"Sure! Here's a classic vanilla cake recipe:\\n\\n**Ingredients:**\\n- 1 ½ cups (300g) all-purpose flour\\n- 1 cup (200g) granulated sugar\\n- 1 teaspoon baking powder\\n- ½ teaspoon baking soda\\n- ½ teaspoon salt\\n- ½ cup (120ml) whole milk\\n- ½ cup (120ml) vegetable oil\\n- 2 large eggs\\n- 2 teaspoons pure vanilla extract\\n\\n**Instructions:**\\n1. Preheat your oven to 350°F (175°C) and grease two 9-inch round cake pans.\\n2. In a large bowl, whisk together the flour, sugar, baking powder, baking soda, and salt.\\n3. Add the milk, oil, eggs, and vanilla extract to the dry ingredients. Mix until just combined.\\n4. Pour the batter evenly into the prepared pans.\\n5. Bake for 25-30 minutes, or until a toothpick inserted into the center comes out clean.\\n6. Let the cakes cool in the pans for 10 minutes, then transfer to a wire rack to cool completely before frosting.\\n<|eot_id|>\\n\\n<|start_header\"}\n" + ] + } + ], + "source": [ + "from joblib import Parallel, delayed\n", + "import json\n", + "import codecs\n", + "import re\n", + "import datetime\n", + "import random\n", + "\n", + "recipe_food = \"\"\"\n", + "How to make cake?\n", + "\"\"\"\n", + "\n", + "prompt_template = f\"\"\"\n", + "<|begin_of_text|>\n", + "<|start_header_id|>system<|end_header_id|>\n", + "You are a helpful chef assistant who is an expert in screating recipes.\n", + "<|eot_id|>\n", + "\n", + "<|start_header_id|>user<|end_header_id|>\n", + "Create a recipe here.\n", + "\n", + "{recipe_food}\n", + "\n", + "\n", + "Provide the summary directly, without any introduction or preamble. Do not start the response with \"Here is a...\".<|eot_id|>\n", + "\n", + "<|start_header_id|>assistant<|end_header_id|>\n", + "\"\"\"\n", + "\n", + "\n", + "endpoint_name = \"Sagemaker-inference-componet-mme3\"\n", + "IC_s=[]\n", + "InferenceComponents = sm_client.list_inference_components(\n", + " EndpointNameEquals=endpoint_name\n", + ")['InferenceComponents']\n", + "for InferenceComponent in InferenceComponents:\n", + " IC_name = InferenceComponent['InferenceComponentName']\n", + " print(IC_name)\n", + " IC_s.append(IC_name)\n", + "\n", + "import json\n", + "payload = {\n", + " \"inputs\": prompt_template,\n", + " \"parameters\": {\n", + " \"do_sample\":True,\n", + " \"max_new_tokens\":256,\n", + " \"top_p\":0.9,\n", + " \"temperature\":0.6,\n", + " }\n", + " }\n", + "\n", + "\n", + "def invoke_test():\n", + " response = smr_client.invoke_endpoint(\n", + " EndpointName=endpoint_name,\n", + " #InferenceComponentName = \"IC-deepseek-r1-distill-qwen-1-5b-2025-02-22-03-56-14\",\n", + " InferenceComponentName = 'IC-deepseek-r1-distill-qwen-32b-2025-02-22-02-57-09',\n", + " ContentType=\"application/json\",\n", + " Accept=\"application/json\",\n", + " Body=json.dumps(payload),\n", + " )\n", + " \n", + " result = json.loads(response['Body'].read().decode())\n", + " print(result)\n", + "\n", + "\n", + "invoke_test()\n", + " \n", + "## 10 并发\n", + "#results = Parallel(n_jobs=100, prefer='threads', verbose=1,)(\n", + "# delayed(invoke_test)()\n", + "# for index in range(1,10000)\n", + "#)" + ] + }, + { + "cell_type": "markdown", + "id": "949a516e-9df4-42d7-8ffb-747c7c28e3b3", + "metadata": { + "tags": [] + }, + "source": [ + "## inference componnet managment" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e7ce7583-529c-4aa7-a835-38324c451a9c", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'InferenceComponents': [{'CreationTime': datetime.datetime(2025, 2, 22, 3, 56, 14, 793000, tzinfo=tzlocal()),\n", + " 'InferenceComponentArn': 'arn:aws:sagemaker:us-west-2:687912291502:inference-component/IC-deepseek-r1-distill-qwen-1-5b-2025-02-22-03-56-14',\n", + " 'InferenceComponentName': 'IC-deepseek-r1-distill-qwen-1-5b-2025-02-22-03-56-14',\n", + " 'EndpointArn': 'arn:aws:sagemaker:us-west-2:687912291502:endpoint/sagemaker-inference-componet-mme3',\n", + " 'EndpointName': 'Sagemaker-inference-componet-mme3',\n", + " 'VariantName': 'AllTraffic',\n", + " 'InferenceComponentStatus': 'InService',\n", + " 'LastModifiedTime': datetime.datetime(2025, 2, 22, 3, 57, 37, 793000, tzinfo=tzlocal())},\n", + " {'CreationTime': datetime.datetime(2025, 2, 22, 2, 57, 9, 771000, tzinfo=tzlocal()),\n", + " 'InferenceComponentArn': 'arn:aws:sagemaker:us-west-2:687912291502:inference-component/IC-deepseek-r1-distill-qwen-32b-2025-02-22-02-57-09',\n", + " 'InferenceComponentName': 'IC-deepseek-r1-distill-qwen-32b-2025-02-22-02-57-09',\n", + " 'EndpointArn': 'arn:aws:sagemaker:us-west-2:687912291502:endpoint/sagemaker-inference-componet-mme3',\n", + " 'EndpointName': 'Sagemaker-inference-componet-mme3',\n", + " 'VariantName': 'AllTraffic',\n", + " 'InferenceComponentStatus': 'InService',\n", + " 'LastModifiedTime': datetime.datetime(2025, 2, 22, 11, 48, 48, 32000, tzinfo=tzlocal())}],\n", + " 'ResponseMetadata': {'RequestId': '5f72e5cc-222e-4028-9dbf-0f8d544056cd',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': '5f72e5cc-222e-4028-9dbf-0f8d544056cd',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'content-length': '1037',\n", + " 'date': 'Sat, 22 Feb 2025 12:24:31 GMT'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "endpoint_name = \"Sagemaker-inference-componet-mme3\"\n", + "sm_client.list_inference_components(\n", + " EndpointNameEquals=endpoint_name\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "31676df7-5259-41f2-a3d8-db34e6332d3a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "endpoint_name = \"Sagemaker-inference-componet-mme3\"\n", + "response = sm_client.update_inference_component(\n", + " InferenceComponentName='IC-deepseek-r1-distill-qwen-32b-2025-02-22-02-57-09',\n", + " Specification={\n", + " \"ModelName\": \"deepseek-r1-distill-qwen-32b-2025-02-22-02-57-08\",\n", + " 'ComputeResourceRequirements': {\n", + " 'NumberOfAcceleratorDevicesRequired': 4,\n", + " 'MinMemoryRequiredInMb': 86024,\n", + " 'MaxMemoryRequiredInMb': 92400\n", + " }\n", + " },\n", + " RuntimeConfig={\n", + " 'CopyCount': 1\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "b4a7e267-3d68-42be-8398-9f259f6ce078", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ResponseMetadata': {'RequestId': '0b326388-6144-483d-9dd1-5f826b4c2a03',\n", + " 'HTTPStatusCode': 200,\n", + " 'HTTPHeaders': {'x-amzn-requestid': '0b326388-6144-483d-9dd1-5f826b4c2a03',\n", + " 'content-type': 'application/x-amz-json-1.1',\n", + " 'date': 'Fri, 21 Feb 2025 10:29:34 GMT',\n", + " 'content-length': '0'},\n", + " 'RetryAttempts': 0}}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sm_client.delete_inference_component(InferenceComponentName='IC-deepseek-r1-distill-qwen-32b-****')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "39bf3518-ae87-475d-8da0-0176c5089253", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "IC-deepseek-r1-distill-qwen-32b-2025-02-22-01-33-32\n", + "IC-deepseek-r1-distill-qwen-1-5b-2025-02-22-01-33-31\n" + ] + } + ], + "source": [ + "##delete all IC componet\n", + "endpoint_name = \"Sagemaker-inference-componet-mme3\"\n", + "InferenceComponents = sm_client.list_inference_components(\n", + " EndpointNameEquals=endpoint_name\n", + ")['InferenceComponents']\n", + "for InferenceComponent in InferenceComponents:\n", + " IC_name = InferenceComponent['InferenceComponentName']\n", + " print(IC_name)\n", + " sm_client.delete_inference_component(InferenceComponentName=IC_name)\n", + "#print(response)" + ] + }, + { + "cell_type": "markdown", + "id": "ad53d3f7-c686-4436-989a-485a92c5764e", + "metadata": {}, + "source": [ + "## clear" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3010af8-0740-45c6-b2d4-1f215f812e29", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import time\n", + "sagemaker_client = boto3.client('sagemaker')\n", + "model_list = sagemaker_client.list_models()\n", + "for model in model_list['Models']:\n", + " model_name = model['ModelName']\n", + " model_arn = model['ModelArn']\n", + " creation_time = model['CreationTime']\n", + " print(f\"model name: {model_name}, ARN: {model_arn}, created timestamp: {creation_time}\")\n", + " sagemaker_client.delete_model(ModelName=model_name)\n", + "\n", + " time.sleep(2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3d674b41", + "metadata": {}, + "outputs": [], + "source": [ + "sess.delete_endpoint(endpoint_name)\n", + "sess.delete_endpoint_config(endpoint_name)\n", + "model.delete_model()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "conda_python3", + "language": "python", + "name": "conda_python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}