Skip to content
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 48 additions & 1 deletion data/downloading-data/direct-loading.md
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,53 @@ dcm = pydicom.dcmread("...") # Any method to read from file/cloud storage


print("Has Extended Offset Table:", "ExtendedOffsetTable" in dcm)
print("Has Basic Offset Table:", dcm.Pixeldata[4:8] != b'\x00\x00\x00\x00')
print("Has Basic Offset Table:", dcm.PixelData[4:8] != b'\x00\x00\x00\x00')

```

To do this from a remote Google Cloud Storage blob without needing to pull all the pixel data, you can do something like this:

```python

from pydicom import dcmread
from pydicom.filebase import DicomIO
from google.cloud import storage


# Create a storage client and use it to access the IDC's public data package
gcs_client = storage.Client.create_anonymous_client()

# Blob object for the particular file you want to check
blob = (
gcs_client
.bucket("idc-open-data")
.blob("3a84b4b8-b9c1-45e5-99db-d778fd5218f8/155b267a-02fb-41d3-abd7-a807df84ef3f.dcm")
)


# Open the blob object for remote reading with a ~500kB chunk size
with blob.open(mode="rb", chunk_size=500_000) as reader:
# Wrap the reader in a DicomIO for compatibility with pydicom
buf = DicomIO(reader)

# Read the file with stop_before_pixels=True, this moves the cursor
# position to the start of the pixel data attribute
dcm = dcmread(buf, stop_before_pixels=True)

# The presence of the extended offset table in the loaded metadata can be
# checked straightforwardly
has_extended_offset_table = "ExtendedOffsetTable" in dcm
print("Has Extended Offset Table:", has_extended_offset_table)

# Read the next tag, should be the pixel data tag
tag = buf.read(4)
assert tag == b'\xe0\x7f\x10\x00'

# Read the 32bit length of the pixel data's basic offset table
length = buf.read(4)

# If the length of the offset table is non-zero, the offset table exists
has_basic_offset_table = length != b'\x00\x00\x00\x00'
print("Has Basic Offset Table:", has_basic_offset_table)

```