@@ -188,35 +188,31 @@ from idc_index import IDCClient
188188# Create IDCClient for looking up bucket URLs
189189idc_client = IDCClient()
190190
191- # Get the list of file URLs in AWS bucket from SeriesInstanceUID
192- # In this case we are using a series from the IDC CCDI-MCI collection
193- file_urls = idc_client.get_series_file_URLs(
194- seriesInstanceUID = " 1.3.6.1.4.1.5962.99.1.1900325859.924065538.1719887277027.4.0" ,
195- source_bucket_location = " gcs"
196- )
197-
198- ( _, _, bucket_name, folder_name, file_name) = file_urls[0 ].split(" /" )
191+ # install additional component of idc-index to resolve SM instances to file URLs
192+ idc_client.fetch_index(" sm_instance_index" )
193+
194+ # given SeriesInstanceUID of an SM series, find the instance that corresponds to the
195+ # highest resolution base layer of the image pyramid
196+ query = """
197+ SELECT SOPInstanceUID, TotalPixelMatrixColumns
198+ FROM sm_instance_index
199+ WHERE SeriesInstanceUID = '1.3.6.1.4.1.5962.99.1.1900325859.924065538.1719887277027.4.0'
200+ ORDER BY TotalPixelMatrixColumns DESC
201+ LIMIT 1
202+ """
203+ result = idc_client.sql_query(query)
204+
205+ # get URL corresponding to the base layer instance in the Google Storage bucket
206+ base_layer_file_url = idc_client.get_instance_file_URL(sopInstanceUID = result.iloc[0 ][" SOPInstanceUID" ], source_bucket_location = " gcs" )
199207
200208# Create a storage client and use it to access the IDC's public data package
201209gcs_client = storage.Client.create_anonymous_client()
202- bucket = gcs_client.bucket(bucket_name)
203210
204- # Go over series instances to find the base (largest matrix) layer
205- # based on TotalPixelMatrixColumns value
206- largest_dimension = 0
207- base_layer_blob = None
208- for instance_file_url in file_urls:
209- (_, _, _, folder_name, file_name) = instance_file_url.split(" /" )
210- blob_name = f " { folder_name} / { file_name} "
211-
212- blob = bucket.blob(blob_name)
213-
214- with blob.open(" rb" ) as reader:
215- dcm = dcmread(reader, specific_tags = [keyword_dict[' TotalPixelMatrixColumns' ]])
216- total_columns = dcm.TotalPixelMatrixColumns
217- if total_columns > largest_dimension:
218- largest_dimension = total_columns
219- base_layer_blob = blob
211+ (_,_, bucket_name, folder_name, file_name) = base_layer_file_url.split(" /" )
212+ blob_key = f " { folder_name} / { file_name} "
213+
214+ bucket = gcs_client.bucket(bucket_name)
215+ base_layer_blob = bucket.blob(blob_key)
220216
221217# Read directly from the blob object using lazy frame retrieval
222218with base_layer_blob.open(mode = " rb" ) as reader:
0 commit comments