Skip to content

Commit 7b64fde

Browse files
authored
Merge pull request #74 from ImagingDataCommons/remote_offset_table_check
Add section on checking for offset table in remote blob
2 parents e5d07d0 + 1184c92 commit 7b64fde

File tree

1 file changed

+78
-2
lines changed

1 file changed

+78
-2
lines changed

data/downloading-data/direct-loading.md

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,83 @@ import pydicom
334334
dcm = pydicom.dcmread("...") # Any method to read from file/cloud storage
335335

336336

337-
print("Has Extended Offset Table:", "ExtendedOffsetTable" in dcm)
338-
print("Has Basic Offset Table:", dcm.Pixeldata[4:8] != b'\x00\x00\x00\x00')
337+
if not dcm.file_meta.TransferSyntaxUID.is_encapsulated:
338+
print(
339+
"This image does not use an encapsulated (compressed) transfer "
340+
"syntax, so offset tables are not required."
341+
)
342+
else:
343+
# Check metadata for the extended offset table
344+
print("Has Extended Offset Table:", "ExtendedOffsetTable" in dcm)
345+
346+
# The start of the PixelData element will be a 4 byte item tag for the offset table,
347+
# which should always be present. The following 4 bytes gives the length of the offset
348+
# table. If it is non-zero, the offset table is present
349+
has_basic_offset_table = dcm.PixelData[4:8] != b'\x00\x00\x00\x00'
350+
print("Has Basic Offset Table:", has_basic_offset_table)
351+
352+
```
353+
354+
To do this from a remote Google Cloud Storage blob without needing to pull all the pixel data, you can do something like this:
355+
356+
```python
357+
import os
358+
from pydicom import dcmread
359+
from google.cloud import storage
360+
361+
362+
def check_offset_table(blob_key: str):
363+
"""Print information on the offset table in an IDC blob."""
364+
# Create a storage client and use it to access the IDC's public data package
365+
gcs_client = storage.Client.create_anonymous_client()
366+
367+
# Blob object for the particular file you want to check
368+
blob = gcs_client.bucket("idc-open-data").blob(blob_key)
369+
370+
# Open the blob object for remote reading with a ~500kB chunk size
371+
with blob.open(mode="rb", chunk_size=500_000) as reader:
372+
# Read the file with stop_before_pixels=True, this moves the cursor
373+
# position to the start of the pixel data attribute
374+
dcm = dcmread(reader, stop_before_pixels=True)
375+
376+
if not dcm.file_meta.TransferSyntaxUID.is_encapsulated:
377+
print(
378+
"This image does not use an encapsulated (compressed) transfer "
379+
"syntax, so offset tables are not required."
380+
)
381+
else:
382+
# The presence of the extended offset table in the loaded metadata can be
383+
# checked straightforwardly
384+
has_extended_offset_table = "ExtendedOffsetTable" in dcm
385+
print("Has Extended Offset Table:", has_extended_offset_table)
386+
387+
# Read the next tag, should be the pixel data tag
388+
tag = reader.read(4)
389+
assert tag == b'\xe0\x7f\x10\x00', "Expected pixel data tag"
390+
391+
# Skip over VR (2 bytes), reserved (2 bytes), and pixel data length (4
392+
# bytes), giving 8 bytes total. Refer to
393+
# https://dicom.nema.org/medical/dicom/current/output/chtml/part05/sect_A.4.html#table_A.4-2
394+
reader.seek(8, os.SEEK_CUR)
395+
396+
# Read the item tag for the offset table item
397+
item_tag = reader.read(4)
398+
assert item_tag == b'\xfe\xff\x00\xe0', "Expected item tag"
399+
400+
# Read the 32bit length of the pixel data's basic offset table
401+
length = reader.read(4)
402+
403+
# If the length of the offset table is non-zero, the offset table exists
404+
has_basic_offset_table = (length != b'\x00\x00\x00\x00')
405+
print("Has Basic Offset Table:", has_basic_offset_table)
406+
407+
408+
# Example with no offset table (NLST-LSS collection)
409+
check_offset_table("4a30ffd2-8489-427b-9a83-03f4cf28534d/ad46e1e3-b37c-434b-a67a-5bacbcc608d9.dcm")
410+
411+
# Example with basic offset table (CCDI-MCI collection)
412+
check_offset_table("763fe058-7d25-4ba7-9b29-fd3d6c41dc4b/210f0529-c767-4795-9acf-bad2f4877427.dcm")
339413

414+
# Example with extended offset table (CMB-MML collection)
415+
check_offset_table("79f38b50-4df4-4358-9271-f28aeac573d7/23b9272a-34ef-49ca-833f-84329a18c1e4.dcm")
340416
```

0 commit comments

Comments
 (0)