@@ -334,7 +334,83 @@ import pydicom
334
334
dcm = pydicom.dcmread(" ..." ) # Any method to read from file/cloud storage
335
335
336
336
337
- print (" Has Extended Offset Table:" , " ExtendedOffsetTable" in dcm)
338
- print (" Has Basic Offset Table:" , dcm.Pixeldata[4 :8 ] != b ' \x00\x00\x00\x00 ' )
337
+ if not dcm.file_meta.TransferSyntaxUID.is_encapsulated:
338
+ print (
339
+ " This image does not use an encapsulated (compressed) transfer "
340
+ " syntax, so offset tables are not required."
341
+ )
342
+ else :
343
+ # Check metadata for the extended offset table
344
+ print (" Has Extended Offset Table:" , " ExtendedOffsetTable" in dcm)
345
+
346
+ # The start of the PixelData element will be a 4 byte item tag for the offset table,
347
+ # which should always be present. The following 4 bytes gives the length of the offset
348
+ # table. If it is non-zero, the offset table is present
349
+ has_basic_offset_table = dcm.PixelData[4 :8 ] != b ' \x00\x00\x00\x00 '
350
+ print (" Has Basic Offset Table:" , has_basic_offset_table)
351
+
352
+ ```
353
+
354
+ To do this from a remote Google Cloud Storage blob without needing to pull all the pixel data, you can do something like this:
355
+
356
+ ``` python
357
+ import os
358
+ from pydicom import dcmread
359
+ from google.cloud import storage
360
+
361
+
362
+ def check_offset_table (blob_key : str ):
363
+ """ Print information on the offset table in an IDC blob."""
364
+ # Create a storage client and use it to access the IDC's public data package
365
+ gcs_client = storage.Client.create_anonymous_client()
366
+
367
+ # Blob object for the particular file you want to check
368
+ blob = gcs_client.bucket(" idc-open-data" ).blob(blob_key)
369
+
370
+ # Open the blob object for remote reading with a ~500kB chunk size
371
+ with blob.open(mode = " rb" , chunk_size = 500_000 ) as reader:
372
+ # Read the file with stop_before_pixels=True, this moves the cursor
373
+ # position to the start of the pixel data attribute
374
+ dcm = dcmread(reader, stop_before_pixels = True )
375
+
376
+ if not dcm.file_meta.TransferSyntaxUID.is_encapsulated:
377
+ print (
378
+ " This image does not use an encapsulated (compressed) transfer "
379
+ " syntax, so offset tables are not required."
380
+ )
381
+ else :
382
+ # The presence of the extended offset table in the loaded metadata can be
383
+ # checked straightforwardly
384
+ has_extended_offset_table = " ExtendedOffsetTable" in dcm
385
+ print (" Has Extended Offset Table:" , has_extended_offset_table)
386
+
387
+ # Read the next tag, should be the pixel data tag
388
+ tag = reader.read(4 )
389
+ assert tag == b ' \xe0\x7f\x10\x00 ' , " Expected pixel data tag"
390
+
391
+ # Skip over VR (2 bytes), reserved (2 bytes), and pixel data length (4
392
+ # bytes), giving 8 bytes total. Refer to
393
+ # https://dicom.nema.org/medical/dicom/current/output/chtml/part05/sect_A.4.html#table_A.4-2
394
+ reader.seek(8 , os.SEEK_CUR )
395
+
396
+ # Read the item tag for the offset table item
397
+ item_tag = reader.read(4 )
398
+ assert item_tag == b ' \xfe\xff\x00\xe0 ' , " Expected item tag"
399
+
400
+ # Read the 32bit length of the pixel data's basic offset table
401
+ length = reader.read(4 )
402
+
403
+ # If the length of the offset table is non-zero, the offset table exists
404
+ has_basic_offset_table = (length != b ' \x00\x00\x00\x00 ' )
405
+ print (" Has Basic Offset Table:" , has_basic_offset_table)
406
+
407
+
408
+ # Example with no offset table (NLST-LSS collection)
409
+ check_offset_table(" 4a30ffd2-8489-427b-9a83-03f4cf28534d/ad46e1e3-b37c-434b-a67a-5bacbcc608d9.dcm" )
410
+
411
+ # Example with basic offset table (CCDI-MCI collection)
412
+ check_offset_table(" 763fe058-7d25-4ba7-9b29-fd3d6c41dc4b/210f0529-c767-4795-9acf-bad2f4877427.dcm" )
339
413
414
+ # Example with extended offset table (CMB-MML collection)
415
+ check_offset_table(" 79f38b50-4df4-4358-9271-f28aeac573d7/23b9272a-34ef-49ca-833f-84329a18c1e4.dcm" )
340
416
```
0 commit comments