Skip to content

Commit cf9b7e5

Browse files
committed
replace iteration over instances with direct selection using new idc-index function
1 parent d9c8ee2 commit cf9b7e5

File tree

1 file changed

+21
-25
lines changed

1 file changed

+21
-25
lines changed

data/downloading-data/direct-loading.md

Lines changed: 21 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -188,35 +188,31 @@ from idc_index import IDCClient
188188
# Create IDCClient for looking up bucket URLs
189189
idc_client = IDCClient()
190190

191-
# Get the list of file URLs in AWS bucket from SeriesInstanceUID
192-
# In this case we are using a series from the IDC CCDI-MCI collection
193-
file_urls = idc_client.get_series_file_URLs(
194-
seriesInstanceUID="1.3.6.1.4.1.5962.99.1.1900325859.924065538.1719887277027.4.0",
195-
source_bucket_location="gcs"
196-
)
197-
198-
( _, _, bucket_name, folder_name, file_name) = file_urls[0].split("/")
191+
# install additional component of idc-index to resolve SM instances to file URLs
192+
idc_client.fetch_index("sm_instance_index")
193+
194+
# given SeriesInstanceUID of an SM series, find the instance that corresponds to the
195+
# highest resolution base layer of the image pyramid
196+
query = """
197+
SELECT SOPInstanceUID, TotalPixelMatrixColumns
198+
FROM sm_instance_index
199+
WHERE SeriesInstanceUID = '1.3.6.1.4.1.5962.99.1.1900325859.924065538.1719887277027.4.0'
200+
ORDER BY TotalPixelMatrixColumns DESC
201+
LIMIT 1
202+
"""
203+
result = idc_client.sql_query(query)
204+
205+
# get URL corresponding to the base layer instance in the Google Storage bucket
206+
base_layer_file_url = idc_client.get_instance_file_URL(sopInstanceUID=result.iloc[0]["SOPInstanceUID"], source_bucket_location="gcs")
199207

200208
# Create a storage client and use it to access the IDC's public data package
201209
gcs_client = storage.Client.create_anonymous_client()
202-
bucket = gcs_client.bucket(bucket_name)
203210

204-
# Go over series instances to find the base (largest matrix) layer
205-
# based on TotalPixelMatrixColumns value
206-
largest_dimension = 0
207-
base_layer_blob = None
208-
for instance_file_url in file_urls:
209-
(_, _, _, folder_name, file_name) = instance_file_url.split("/")
210-
blob_name = f"{folder_name}/{file_name}"
211-
212-
blob = bucket.blob(blob_name)
213-
214-
with blob.open("rb") as reader:
215-
dcm = dcmread(reader, specific_tags=[keyword_dict['TotalPixelMatrixColumns']])
216-
total_columns = dcm.TotalPixelMatrixColumns
217-
if total_columns > largest_dimension:
218-
largest_dimension = total_columns
219-
base_layer_blob = blob
211+
(_,_, bucket_name, folder_name, file_name) = base_layer_file_url.split("/")
212+
blob_key = f"{folder_name}/{file_name}"
213+
214+
bucket = gcs_client.bucket(bucket_name)
215+
base_layer_blob = bucket.blob(blob_key)
220216

221217
# Read directly from the blob object using lazy frame retrieval
222218
with base_layer_blob.open(mode="rb") as reader:

0 commit comments

Comments
 (0)