diff --git a/deployments/gpu_pytorch_demo/demo.py b/deployments/gpu_pytorch_demo/demo.py new file mode 100644 index 000000000..c5f0a32b6 --- /dev/null +++ b/deployments/gpu_pytorch_demo/demo.py @@ -0,0 +1,29 @@ +# Copyright 2025 Intel Corporation. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch as t + +if not t.xpu.is_available(): + raise Exception("No xpu available") + +arr = t.tensor([1.0, 2.0, 3.0, 4.0]).to("xpu") + +arr *= 2 + +print(arr) + +cmp = t.tensor([2.0, 4.0, 6.0, 8.0]).to("xpu") + +if not t.equal(arr, cmp): + raise Exception("Not valid result:", arr) diff --git a/deployments/gpu_tensorflow_test/deployment.yaml b/deployments/gpu_pytorch_demo/deployment.yaml similarity index 83% rename from deployments/gpu_tensorflow_test/deployment.yaml rename to deployments/gpu_pytorch_demo/deployment.yaml index 472c85f49..a7843e0e9 100644 --- a/deployments/gpu_tensorflow_test/deployment.yaml +++ b/deployments/gpu_pytorch_demo/deployment.yaml @@ -6,12 +6,12 @@ spec: restartPolicy: Never containers: - name: testcontainer - image: intel/intel-extension-for-tensorflow:latest + image: intel/intel-extension-for-pytorch:xpu imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: false command: ["/bin/sh", "-c"] - args: ["python /code/training.py"] + args: ["python3 /code/demo.py"] resources: limits: gpu.intel.com/i915: 1 diff --git a/deployments/gpu_tensorflow_test/kustomization.yaml b/deployments/gpu_pytorch_demo/kustomization.yaml similarity index 51% rename from deployments/gpu_tensorflow_test/kustomization.yaml rename to deployments/gpu_pytorch_demo/kustomization.yaml index eb361008d..2c03ae2a6 100644 --- a/deployments/gpu_tensorflow_test/kustomization.yaml +++ b/deployments/gpu_pytorch_demo/kustomization.yaml @@ -1,11 +1,11 @@ configMapGenerator: - name: training-code files: - - training.py + - demo.py resources: - deployment.yaml images: - - name: intel/intel-extension-for-tensorflow - newTag: 1.2.0-gpu + - name: intel/intel-extension-for-pytorch + newTag: 2.8.10-xpu diff --git a/deployments/gpu_tensorflow_test/training.py b/deployments/gpu_tensorflow_test/training.py deleted file mode 100644 index 77c284ce2..000000000 --- a/deployments/gpu_tensorflow_test/training.py +++ /dev/null @@ -1,61 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. -# Copyright 2023 Intel Corporation. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# original code from: -# https://github.com/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l02c01_celsius_to_fahrenheit.ipynb -# this is slightly modified to run explicitly with XPU devices - -import tensorflow as tf -import intel_extension_for_tensorflow as itex -import numpy as np - -print("BACKENDS: ", str(itex.get_backend())) - -devs = tf.config.list_physical_devices('XPU') - -print(devs) - -if not devs: - raise Exception("No devices found") - -with tf.device("/xpu:0"): - celsius_q = np.array([-40, -10, 0, 8, 15, 22, 38], dtype=float) - fahrenheit_a = np.array([-40, 14, 32, 46, 59, 72, 100], dtype=float) - - model = tf.keras.Sequential([ - tf.keras.layers.Dense(units=1, input_shape=[1]) - ]) - - model.compile(loss='mean_squared_error', - optimizer=tf.keras.optimizers.Adam(0.1)) - - history = model.fit(celsius_q, fahrenheit_a, epochs=500, verbose=False) - - print("model trained") - - test = [100.0] - p = model.predict(test) - - if len(p) != 1: - raise Exception("invalid result obj") - - prediction = p[0] - - if prediction >= 211 and prediction <= 213: - print("inference ok: %f" % prediction) - else: - raise Exception("bad prediction %f" % prediction) - - print("SUCCESS") diff --git a/test/e2e/gpu/gpu.go b/test/e2e/gpu/gpu.go index 0d8b6b9c9..0bb567f8e 100644 --- a/test/e2e/gpu/gpu.go +++ b/test/e2e/gpu/gpu.go @@ -41,8 +41,8 @@ const ( healthMgmtYaml = "deployments/gpu_plugin/overlays/health/kustomization.yaml" nfdRulesYaml = "deployments/nfd/overlays/node-feature-rules/kustomization.yaml" containerName = "testcontainer" - tfKustomizationYaml = "deployments/gpu_tensorflow_test/kustomization.yaml" - tfPodName = "training-pod" + ptKustomizationYaml = "deployments/gpu_pytorch_demo/kustomization.yaml" + ptPodName = "training-pod" ) func init() { @@ -195,12 +195,12 @@ func describe() { }) }) - ginkgo.It("run a small workload on the GPU [App:tensorflow]", func(ctx context.Context) { + ginkgo.It("run a small workload on the GPU [App:pytorch]", func(ctx context.Context) { createPluginAndVerifyExistence(f, ctx, vanillaPath, "gpu.intel.com/i915") - kustomYaml, err := utils.LocateRepoFile(tfKustomizationYaml) + kustomYaml, err := utils.LocateRepoFile(ptKustomizationYaml) if err != nil { - framework.Failf("unable to locate %q: %v", tfKustomizationYaml, err) + framework.Failf("unable to locate %q: %v", ptKustomizationYaml, err) } ginkgo.By("submitting demo deployment") @@ -209,8 +209,8 @@ func describe() { ginkgo.By("waiting the pod to finish") - err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, tfPodName, f.Namespace.Name, 300*time.Second) - gomega.Expect(err).To(gomega.BeNil(), utils.GetPodLogs(ctx, f, tfPodName, containerName)) + err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, ptPodName, f.Namespace.Name, 300*time.Second) + gomega.Expect(err).To(gomega.BeNil(), utils.GetPodLogs(ctx, f, ptPodName, containerName)) framework.Logf("tensorflow execution succeeded!") })