From e8c87c8c11b463efe078bb99916d687862d6b227 Mon Sep 17 00:00:00 2001 From: eunsang Date: Fri, 13 Jun 2025 19:19:23 +0900 Subject: [PATCH] fix: use TemporaryDirectory instead of NamedTemporaryFile for MLTable.save --- .../custom_preprocessing/src/run.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/cli/monitoring/components/custom_preprocessing/src/run.py b/cli/monitoring/components/custom_preprocessing/src/run.py index 0e73565d82..8fe5ad41ee 100644 --- a/cli/monitoring/components/custom_preprocessing/src/run.py +++ b/cli/monitoring/components/custom_preprocessing/src/run.py @@ -75,22 +75,22 @@ def preprocess( # Data column is a list of objects, convert it into string because spark.read_json cannot read object table = table.convert_column_types({"data": mltable.DataType.to_string()}) - # Use NamedTemporaryFile to create a secure temp file - with tempfile.NamedTemporaryFile(delete=False) as temp_file: - save_path = temp_file.name - table.save(save_path) - - # Save preprocessed_data MLTable to temp location - des_path = preprocessed_input_data + "temp" - fs = AzureMachineLearningFileSystem(des_path) - print("MLTable path:", des_path) - # TODO: Evaluate if we need to overwrite - fs.upload( - lpath=save_path, - rpath="", - **{"overwrite": "MERGE_WITH_OVERWRITE"}, - recursive=True, - ) + with tempfile.TemporaryDirectory() as temp_dir: + table.save(temp_dir) + print(f"Temporary MLTable saved at: {temp_dir}") + + # Construct remote destination path + des_path = preprocessed_input_data + "temp" + fs = AzureMachineLearningFileSystem(des_path) + + print(f"Uploading MLTable to: {des_path}") + fs.upload( + lpath=temp_dir, # this is the local directory with the MLTable metadata + rpath="", + **{"overwrite": "MERGE_WITH_OVERWRITE"}, + recursive=True, + ) + print("Upload complete.") # Read mltable from preprocessed_data df = read_mltable_in_spark(mltable_path=des_path)