databricks
diff --git a/‎src/databricks/sql/backend/sea/queue.py‎
Lines changed: 58 additions & 201 deletions b/‎src/databricks/sql/backend/sea/queue.py‎
Lines changed: 58 additions & 201 deletions
@@ -1,8 +1,7 @@
 from __future__ import annotations
 
 from abc import ABC
-import threading
-from typing import Dict, List, Optional, Tuple, Union, TYPE_CHECKING
+from typing import List, Optional, Tuple, Union, TYPE_CHECKING
 
 from databricks.sql.cloudfetch.download_manager import ResultFileDownloadManager
 
@@ -122,179 +121,6 @@ def close(self):
         return
 
 
-class LinkFetcher:
-    """
-    Background helper that incrementally retrieves *external links* for a
-    result set produced by the SEA backend and feeds them to a
-    :class:`databricks.sql.cloudfetch.download_manager.ResultFileDownloadManager`.
-
-    The SEA backend splits large result sets into *chunks*.  Each chunk is
-    stored remotely (e.g., in object storage) and exposed via a signed URL
-    encapsulated by an :class:`ExternalLink`.  Only the first batch of links is
-    returned with the initial query response.  The remaining links must be
-    pulled on demand using the *next-chunk* token embedded in each
-    :pyattr:`ExternalLink.next_chunk_index`.
-
-    LinkFetcher takes care of this choreography so callers (primarily
-    ``SeaCloudFetchQueue``) can simply ask for the link of a specific
-    ``chunk_index`` and block until it becomes available.
-
-    Key responsibilities:
-
-    • Maintain an in-memory mapping from ``chunk_index`` → ``ExternalLink``.
-    • Launch a background worker thread that continuously requests the next
-      batch of links from the backend until all chunks have been discovered or
-      an unrecoverable error occurs.
-    • Bridge SEA link objects to the Thrift representation expected by the
-      existing download manager.
-    • Provide a synchronous API (`get_chunk_link`) that blocks until the desired
-      link is present in the cache.
-    """
-
-    def __init__(
-        self,
-        download_manager: ResultFileDownloadManager,
-        backend: SeaDatabricksClient,
-        statement_id: str,
-        initial_links: List[ExternalLink],
-        total_chunk_count: int,
-    ):
-        self.download_manager = download_manager
-        self.backend = backend
-        self._statement_id = statement_id
-
-        self._shutdown_event = threading.Event()
-
-        self._link_data_update = threading.Condition()
-        self._error: Optional[Exception] = None
-        self.chunk_index_to_link: Dict[int, ExternalLink] = {}
-
-        self._add_links(initial_links)
-        self.total_chunk_count = total_chunk_count
-
-        # DEBUG: capture initial state for observability
-        logger.debug(
-            "LinkFetcher[%s]: initialized with %d initial link(s); expecting %d total chunk(s)",
-            statement_id,
-            len(initial_links),
-            total_chunk_count,
-        )
-
-    def _add_links(self, links: List[ExternalLink]):
-        """Cache *links* locally and enqueue them with the download manager."""
-        logger.debug(
-            "LinkFetcher[%s]: caching %d link(s) – chunks %s",
-            self._statement_id,
-            len(links),
-            ", ".join(str(l.chunk_index) for l in links) if links else "<none>",
-        )
-        for link in links:
-            self.chunk_index_to_link[link.chunk_index] = link
-            self.download_manager.add_link(LinkFetcher._convert_to_thrift_link(link))
-
-    def _get_next_chunk_index(self) -> Optional[int]:
-        """Return the next *chunk_index* that should be requested from the backend, or ``None`` if we have them all."""
-        with self._link_data_update:
-            max_chunk_index = max(self.chunk_index_to_link.keys(), default=None)
-            if max_chunk_index is None:
-                return 0
-            max_link = self.chunk_index_to_link[max_chunk_index]
-            return max_link.next_chunk_index
-
-    def _trigger_next_batch_download(self) -> bool:
-        """Fetch the next batch of links from the backend and return *True* on success."""
-        logger.debug(
-            "LinkFetcher[%s]: requesting next batch of links", self._statement_id
-        )
-        next_chunk_index = self._get_next_chunk_index()
-        if next_chunk_index is None:
-            return False
-
-        try:
-            links = self.backend.get_chunk_links(self._statement_id, next_chunk_index)
-            with self._link_data_update:
-                self._add_links(links)
-                self._link_data_update.notify_all()
-        except Exception as e:
-            logger.error(
-                f"LinkFetcher: Error fetching links for chunk {next_chunk_index}: {e}"
-            )
-            with self._link_data_update:
-                self._error = e
-                self._link_data_update.notify_all()
-            return False
-
-        logger.debug(
-            "LinkFetcher[%s]: received %d new link(s)",
-            self._statement_id,
-            len(links),
-        )
-        return True
-
-    def get_chunk_link(self, chunk_index: int) -> Optional[ExternalLink]:
-        """Return (blocking) the :class:`ExternalLink` associated with *chunk_index*."""
-        logger.debug(
-            "LinkFetcher[%s]: waiting for link of chunk %d",
-            self._statement_id,
-            chunk_index,
-        )
-        if chunk_index >= self.total_chunk_count:
-            return None
-
-        with self._link_data_update:
-            while chunk_index not in self.chunk_index_to_link:
-                if self._error:
-                    raise self._error
-                if self._shutdown_event.is_set():
-                    raise ProgrammingError(
-                        "LinkFetcher is shutting down without providing link for chunk index {}".format(
-                            chunk_index
-                        )
-                    )
-                self._link_data_update.wait()
-
-            return self.chunk_index_to_link[chunk_index]
-
-    @staticmethod
-    def _convert_to_thrift_link(link: ExternalLink) -> TSparkArrowResultLink:
-        """Convert SEA external links to Thrift format for compatibility with existing download manager."""
-        # Parse the ISO format expiration time
-        expiry_time = int(dateutil.parser.parse(link.expiration).timestamp())
-        return TSparkArrowResultLink(
-            fileLink=link.external_link,
-            expiryTime=expiry_time,
-            rowCount=link.row_count,
-            bytesNum=link.byte_count,
-            startRowOffset=link.row_offset,
-            httpHeaders=link.http_headers or {},
-        )
-
-    def _worker_loop(self):
-        """Entry point for the background thread."""
-        logger.debug("LinkFetcher[%s]: worker thread started", self._statement_id)
-        while not self._shutdown_event.is_set():
-            links_downloaded = self._trigger_next_batch_download()
-            if not links_downloaded:
-                self._shutdown_event.set()
-        logger.debug("LinkFetcher[%s]: worker thread exiting", self._statement_id)
-        self._link_data_update.notify_all()
-
-    def start(self):
-        """Spawn the worker thread."""
-        logger.debug("LinkFetcher[%s]: starting worker thread", self._statement_id)
-        self._worker_thread = threading.Thread(
-            target=self._worker_loop, name=f"LinkFetcher-{self._statement_id}"
-        )
-        self._worker_thread.start()
-
-    def stop(self):
-        """Signal the worker thread to stop and wait for its termination."""
-        logger.debug("LinkFetcher[%s]: stopping worker thread", self._statement_id)
-        self._shutdown_event.set()
-        self._worker_thread.join()
-        logger.debug("LinkFetcher[%s]: worker thread stopped", self._statement_id)
-
-
 class SeaCloudFetchQueue(CloudFetchQueue):
     """Queue implementation for EXTERNAL_LINKS disposition with ARROW format for SEA backend."""
 
@@ -332,49 +158,80 @@ def __init__(
             description=description,
         )
 
+        self._sea_client = sea_client
+        self._statement_id = statement_id
+        self._total_chunk_count = total_chunk_count
+
         logger.debug(
             "SeaCloudFetchQueue: Initialize CloudFetch loader for statement {}, total chunks: {}".format(
                 statement_id, total_chunk_count
             )
         )
 
         initial_links = result_data.external_links or []
+        self._chunk_index_to_link = {link.chunk_index: link for link in initial_links}
 
         # Track the current chunk we're processing
         self._current_chunk_index = 0
+        first_link = self._chunk_index_to_link.get(self._current_chunk_index, None)
+        if not first_link:
+            # possibly an empty response
+            return None
 
-        self.link_fetcher = None  # for empty responses, we do not need a link fetcher
-        if total_chunk_count > 0:
-            self.link_fetcher = LinkFetcher(
-                download_manager=self.download_manager,
-                backend=sea_client,
-                statement_id=statement_id,
-                initial_links=initial_links,
-                total_chunk_count=total_chunk_count,
-            )
-            self.link_fetcher.start()
-
+        # Track the current chunk we're processing
+        self._current_chunk_index = 0
         # Initialize table and position
-        self.table = self._create_next_table()
+        self.table = self._create_table_from_link(first_link)
 
-    def _create_next_table(self) -> Union["pyarrow.Table", None]:
-        """Create next table by retrieving the logical next downloaded file."""
-        if self.link_fetcher is None:
-            return None
+    def _convert_to_thrift_link(self, link: ExternalLink) -> TSparkArrowResultLink:
+        """Convert SEA external links to Thrift format for compatibility with existing download manager."""
+        # Parse the ISO format expiration time
+        expiry_time = int(dateutil.parser.parse(link.expiration).timestamp())
+        return TSparkArrowResultLink(
+            fileLink=link.external_link,
+            expiryTime=expiry_time,
+            rowCount=link.row_count,
+            bytesNum=link.byte_count,
+            startRowOffset=link.row_offset,
+            httpHeaders=link.http_headers or {},
+        )
 
-        chunk_link = self.link_fetcher.get_chunk_link(self._current_chunk_index)
-        if chunk_link is None:
+    def _get_chunk_link(self, chunk_index: int) -> Optional["ExternalLink"]:
+        if chunk_index >= self._total_chunk_count:
             return None
 
-        row_offset = chunk_link.row_offset
-        # NOTE: link has already been submitted to download manager at this point
-        arrow_table = self._create_table_at_offset(row_offset)
+        if chunk_index not in self._chunk_index_to_link:
+            links = self._sea_client.get_chunk_links(self._statement_id, chunk_index)
+            self._chunk_index_to_link.update({l.chunk_index: l for l in links})
+
+        link = self._chunk_index_to_link.get(chunk_index, None)
+        if not link:
+            raise ServerOperationError(
+                f"Error fetching link for chunk {chunk_index}",
+                {
+                    "operation-id": self._statement_id,
+                    "diagnostic-info": None,
+                },
+            )
+        return link
 
-        self._current_chunk_index += 1
+    def _create_table_from_link(
+        self, link: ExternalLink
+    ) -> Union["pyarrow.Table", None]:
+        """Create a table from a link."""
+
+        thrift_link = self._convert_to_thrift_link(link)
+        self.download_manager.add_link(thrift_link)
+
+        row_offset = link.row_offset
+        arrow_table = self._create_table_at_offset(row_offset)
 
         return arrow_table
 
-    def close(self):
-        super().close()
-        if self.link_fetcher:
-            self.link_fetcher.stop()
+    def _create_next_table(self) -> Union["pyarrow.Table", None]:
+        """Create next table by retrieving the logical next downloaded file."""
+        self._current_chunk_index += 1
+        next_chunk_link = self._get_chunk_link(self._current_chunk_index)
+        if not next_chunk_link:
+            return None
+        return self._create_table_from_link(next_chunk_link)