databricks
diff --git a/‎src/databricks/sql/backend/sea/queue.py‎
Lines changed: 210 additions & 22 deletions b/‎src/databricks/sql/backend/sea/queue.py‎
Lines changed: 210 additions & 22 deletions
diff --git a/‎src/databricks/sql/cloudfetch/download_manager.py‎
Lines changed: 29 additions & 2 deletions b/‎src/databricks/sql/cloudfetch/download_manager.py‎
Lines changed: 29 additions & 2 deletions
@@ -22,7 +22,7 @@
     ResultManifest,
 )
 from databricks.sql.backend.sea.utils.constants import ResultFormat
-from databricks.sql.exc import ProgrammingError
+from databricks.sql.exc import ProgrammingError, Error
 from databricks.sql.thrift_api.TCLIService.ttypes import TSparkArrowResultLink
 from databricks.sql.types import SSLOptions
 from databricks.sql.utils import (
@@ -137,10 +137,68 @@ def __init__(
         self._error: Optional[Exception] = None
         self.chunk_index_to_link: Dict[int, "ExternalLink"] = {}
 
-        for link in initial_links:
+        # Add initial links (no notification needed during init)
+        self._add_links_to_manager(initial_links, notify=False)
+        self.total_chunk_count = total_chunk_count
+        self._worker_thread: Optional[threading.Thread] = None
+
+    def _add_links_to_manager(self, links: List["ExternalLink"], notify: bool = True):
+        """
+        Add external links to both chunk mapping and download manager.
+        
+        Args:
+            links: List of external links to add
+            notify: Whether to notify waiting threads (default True)
+        """
+        for link in links:
             self.chunk_index_to_link[link.chunk_index] = link
             self.download_manager.add_link(self._convert_to_thrift_link(link))
-        self.total_chunk_count = total_chunk_count
+        
+        if notify:
+            self._link_data_update.notify_all()
+
+    def _clear_chunks_from_index(self, start_chunk_index: int):
+        """
+        Clear all chunks >= start_chunk_index from the chunk mapping.
+        
+        Args:
+            start_chunk_index: The chunk index to start clearing from (inclusive)
+        """
+        chunks_to_remove = [
+            chunk_idx for chunk_idx in self.chunk_index_to_link.keys() 
+            if chunk_idx >= start_chunk_index
+        ]
+        
+        logger.debug(f"LinkFetcher: Clearing chunks {chunks_to_remove} from index {start_chunk_index}")
+        for chunk_idx in chunks_to_remove:
+            del self.chunk_index_to_link[chunk_idx]
+
+    def _fetch_and_add_links(self, chunk_index: int) -> List["ExternalLink"]:
+        """
+        Fetch links from backend and add them to manager.
+        
+        Args:
+            chunk_index: The chunk index to fetch
+            
+        Returns:
+            List of fetched external links
+            
+        Raises:
+            Exception: If fetching fails
+        """
+        logger.debug(f"LinkFetcher: Fetching links for chunk {chunk_index}")
+        
+        try:
+            links = self.backend.get_chunk_links(self._statement_id, chunk_index)
+            self._add_links_to_manager(links, notify=True)
+            logger.debug(f"LinkFetcher: Added {len(links)} links starting from chunk {chunk_index}")
+            return links
+            
+        except Exception as e:
+            logger.error(f"LinkFetcher: Failed to fetch chunk {chunk_index}: {e}")
+            self._error = e
+            self._link_data_update.notify_all()
+            raise e
 
     def _get_next_chunk_index(self) -> Optional[int]:
         with self._link_data_update:
@@ -155,23 +213,13 @@ def _trigger_next_batch_download(self) -> bool:
         if next_chunk_index is None:
             return False
 
-        try:
-            links = self.backend.get_chunk_links(self._statement_id, next_chunk_index)
-            with self._link_data_update:
-                for l in links:
-                    self.chunk_index_to_link[l.chunk_index] = l
-                    self.download_manager.add_link(self._convert_to_thrift_link(l))
-                self._link_data_update.notify_all()
-        except Exception as e:
-            logger.error(
-                f"LinkFetcher: Error fetching links for chunk {next_chunk_index}: {e}"
-            )
-            with self._link_data_update:
-                self._error = e
-                self._link_data_update.notify_all()
-            return False
-
-        return True
+        with self._link_data_update:
+            try:
+                self._fetch_and_add_links(next_chunk_index)
+                return True
+            except Exception:
+                # Error already logged and set by _fetch_and_add_links
+                return False
 
     def get_chunk_link(self, chunk_index: int) -> Optional["ExternalLink"]:
         if chunk_index >= self.total_chunk_count:
@@ -185,6 +233,45 @@ def get_chunk_link(self, chunk_index: int) -> Optional["ExternalLink"]:
 
             return self.chunk_index_to_link.get(chunk_index, None)
 
+    def restart_from_chunk(self, chunk_index: int):
+        """
+        Restart the LinkFetcher from a specific chunk index.
+        
+        This method handles both cases:
+        1. LinkFetcher is done/closed but we need to restart it
+        2. LinkFetcher is active but we need it to start from the expired chunk
+        
+        The key insight: we need to clear all chunks >= restart_chunk_index
+        so that _get_next_chunk_index() returns the correct next chunk.
+        
+        Args:
+            chunk_index: The chunk index to restart from
+        """
+        logger.debug(f"LinkFetcher: Restarting from chunk {chunk_index}")
+        
+        # Stop the current worker if running
+        self.stop()
+        
+        with self._link_data_update:
+            # Clear error state
+            self._error = None
+            
+            # 🔥 CRITICAL: Clear all chunks >= restart_chunk_index
+            # This ensures _get_next_chunk_index() works correctly
+            self._clear_chunks_from_index(chunk_index)
+            
+            # Now fetch the restart chunk (and potentially its batch)
+            # This becomes our new "max chunk" and starting point
+            try:
+                self._fetch_and_add_links(chunk_index)
+            except Exception as e:
+                # Error already logged and set by _fetch_and_add_links
+                raise e
+        
+        # Start the worker again - now _get_next_chunk_index() will work correctly
+        self.start()
+        logger.debug(f"LinkFetcher: Successfully restarted from chunk {chunk_index}")
+
     def _convert_to_thrift_link(self, link: "ExternalLink") -> TSparkArrowResultLink:
         """Convert SEA external links to Thrift format for compatibility with existing download manager."""
         # Parse the ISO format expiration time
@@ -205,12 +292,17 @@ def _worker_loop(self):
                 break
 
     def start(self):
+        if self._worker_thread and self._worker_thread.is_alive():
+            return  # Already running
+        
+        self._shutdown_event.clear()
         self._worker_thread = threading.Thread(target=self._worker_loop)
         self._worker_thread.start()
 
     def stop(self):
-        self._shutdown_event.set()
-        self._worker_thread.join()
+        if self._worker_thread and self._worker_thread.is_alive():
+            self._shutdown_event.set()
+            self._worker_thread.join()
 
 
 class SeaCloudFetchQueue(CloudFetchQueue):
@@ -269,6 +361,7 @@ def __init__(
             max_download_threads=max_download_threads,
             lz4_compressed=lz4_compressed,
             ssl_options=ssl_options,
+            expired_link_callback=self._handle_expired_link,
         )
 
         self.link_fetcher = LinkFetcher(
@@ -283,6 +376,101 @@ def __init__(
         # Initialize table and position
         self.table = self._create_next_table()
 
+    def _handle_expired_link(self, expired_link: TSparkArrowResultLink) -> TSparkArrowResultLink:
+        """
+        Handle expired link for SEA backend.
+        
+        For SEA backend, we can handle expired links robustly by:
+        1. Cancelling all pending downloads
+        2. Finding the chunk index for the expired link
+        3. Restarting the LinkFetcher from that chunk
+        4. Returning the requested link
+        
+        Args:
+            expired_link: The expired link
+            
+        Returns:
+            A new link with the same row offset
+            
+        Raises:
+            Error: If unable to fetch new link
+        """
+        logger.warning(
+            "SeaCloudFetchQueue: Link expired for offset {}, row count {}. Attempting to fetch new links.".format(
+                expired_link.startRowOffset, expired_link.rowCount
+            )
+        )
+        
+        try:
+            # Step 1: Cancel all pending downloads
+            self.download_manager.cancel_all_downloads()
+            logger.debug("SeaCloudFetchQueue: Cancelled all pending downloads")
+            
+            # Step 2: Find which chunk contains the expired link
+            target_chunk_index = self._find_chunk_index_for_row_offset(expired_link.startRowOffset)
+            if target_chunk_index is None:
+                # If we can't find the chunk, we may need to search more broadly
+                # For now, let's assume it's a reasonable chunk based on the row offset
+                # This is a fallback - in practice this should be rare
+                logger.warning(
+                    "SeaCloudFetchQueue: Could not find chunk index for row offset {}, using fallback approach".format(
+                        expired_link.startRowOffset
+                    )
+                )
+                # Try to estimate chunk index - this is a heuristic
+                target_chunk_index = 0  # Start from beginning as fallback
+            
+            # Step 3: Restart LinkFetcher from the target chunk
+            # This handles both stopped and active LinkFetcher cases
+            self.link_fetcher.restart_from_chunk(target_chunk_index)
+            
+            # Step 4: Find and return the link that matches the expired link's row offset
+            # After restart, the chunk should be available
+            for chunk_index, external_link in self.link_fetcher.chunk_index_to_link.items():
+                if external_link.row_offset == expired_link.startRowOffset:
+                    new_thrift_link = self.link_fetcher._convert_to_thrift_link(external_link)
+                    logger.debug(
+                        "SeaCloudFetchQueue: Found replacement link for offset {}, row count {}".format(
+                            new_thrift_link.startRowOffset, new_thrift_link.rowCount
+                        )
+                    )
+                    return new_thrift_link
+            
+            # If we still can't find it, raise an error
+            logger.error(
+                "SeaCloudFetchQueue: Could not find replacement link for row offset {} after restart".format(
+                    expired_link.startRowOffset
+                )
+            )
+            raise Error(f"CloudFetch link has expired and could not be renewed for offset {expired_link.startRowOffset}")
+            
+        except Exception as e:
+            logger.error(
+                "SeaCloudFetchQueue: Error handling expired link: {}".format(str(e))
+            )
+            if isinstance(e, Error):
+                raise e
+            else:
+                raise Error(f"CloudFetch link has expired and renewal failed: {str(e)}")
+
+    def _find_chunk_index_for_row_offset(self, row_offset: int) -> Optional[int]:
+        """
+        Find the chunk index that contains the given row offset.
+        
+        Args:
+            row_offset: The row offset to find
+            
+        Returns:
+            The chunk index, or None if not found
+        """
+        # Search through our known chunks to find the one containing this row offset
+        for chunk_index, external_link in self.link_fetcher.chunk_index_to_link.items():
+            if external_link.row_offset == row_offset:
+                return chunk_index
+        
+        # If not found in known chunks, return None and let the caller handle it
+        return None
+
     def _create_next_table(self) -> Union["pyarrow.Table", None]:
         """Create next table by retrieving the logical next downloaded file."""
         if not self.download_manager:
 
@@ -1,7 +1,7 @@
 import logging
 
 from concurrent.futures import ThreadPoolExecutor, Future
-from typing import List, Union
+from typing import List, Union, Callable
 
 from databricks.sql.cloudfetch.downloader import (
     ResultSetDownloadHandler,
@@ -22,6 +22,7 @@ def __init__(
         max_download_threads: int,
         lz4_compressed: bool,
         ssl_options: SSLOptions,
+        expired_link_callback: Callable[[TSparkArrowResultLink], TSparkArrowResultLink],
     ):
         self._pending_links: List[TSparkArrowResultLink] = []
         for link in links:
@@ -38,7 +39,10 @@ def __init__(
         self._max_download_threads: int = max_download_threads
         self._thread_pool = ThreadPoolExecutor(max_workers=self._max_download_threads)
 
-        self._downloadable_result_settings = DownloadableResultSettings(lz4_compressed)
+        self._downloadable_result_settings = DownloadableResultSettings(
+            is_lz4_compressed=lz4_compressed,
+            expired_link_callback=expired_link_callback
+        )
         self._ssl_options = ssl_options
 
     def get_next_downloaded_file(
@@ -119,6 +123,29 @@ def add_link(self, link: TSparkArrowResultLink):
         )
         self._pending_links.append(link)
 
+    def cancel_all_downloads(self):
+        """
+        Cancel all pending downloads and clear the download queue.
+        
+        This method is typically called when links have expired and we need to
+        cancel all pending downloads before fetching new links.
+        """
+        logger.debug("ResultFileDownloadManager: cancelling all downloads")
+        
+        # Cancel all pending download tasks
+        cancelled_count = 0
+        for task in self._download_tasks:
+            if task.cancel():
+                cancelled_count += 1
+        
+        logger.debug(
+            f"ResultFileDownloadManager: cancelled {cancelled_count} out of {len(self._download_tasks)} downloads"
+        )
+        
+        # Clear the download tasks and pending links
+        self._download_tasks.clear()
+        self._pending_links.clear()
+
     def _shutdown_manager(self):
         # Clear download handlers and shutdown the thread pool
         self._pending_links = []