-
Notifications
You must be signed in to change notification settings - Fork 3
Closed
Description
$ llama-stack-client models register ibm-granite/granite-3.3-8b-instruct-GGUF
$ llama-stack-client models list
┏━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━┳━━━━━━━┓
┃ ┃ ┃ ┃ meta ┃ ┃
┃ mod… ┃ identifier ┃ provider_resource_id ┃ data ┃ prov… ┃
┡━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━╇━━━━━━━┩
│ llm │ ibm-granite/granite-3.3-8b-instruct-GGUF │ ibm-granite/granite-3.3-8b-instruct-GGUF │ │ podm… │
└──────┴──────────────────────────────────────────┴──────────────────────────────────────────┴──────┴───────┘
$ llama-stack-client inference chat-completion --message "tell me a joke" --stream
Assistant> Why don't scientists trust atoms?
Because they make up everything!
╭──────────────────────────────────────────────────────────╮
│ Failed to inference chat-completion │
│ │
│ Error Type: AttributeError │
│ Details: 'NoneType' object has no attribute 'event_type' │
╰──────────────────────────────────────────────────────────╯
Server-side:
[...]
ERROR 2025-06-05 06:56:26,747 llama_stack.providers.utils.inference.prompt_adapter:294 inference: Could not resolve
model ibm-granite/granite-3.3-8b-instruct-GGUF
WARNING 2025-06-05 06:56:26,749 llama_stack.providers.utils.inference.prompt_adapter:475 inference: Could not resolve
model ibm-granite/granite-3.3-8b-instruct-GGUF, defaulting to json tool prompt format
ERROR 2025-06-05 06:57:07,847 llama_stack.distribution.server.server:188 server: Error in sse_generator
╭───────────────────────────────────── Traceback (most recent call last) ─────────────────────────────────────╮
│ /opt/app-root/lib64/python3.11/site-packages/llama_stack/distribution/server/server.py:180 in sse_generator │
│ │
│ 177 │ event_gen = None │
│ 178 │ try: │
│ 179 │ │ event_gen = await event_gen_coroutine │
│ ❱ 180 │ │ async for item in event_gen: │
│ 181 │ │ │ yield create_sse_event(item) │
│ 182 │ │ │ await asyncio.sleep(0.01) │
│ 183 │ except asyncio.CancelledError: │
│ │
│ /opt/app-root/lib64/python3.11/site-packages/llama_stack/distribution/routers/inference.py:239 in │
│ stream_generator │
│ │
│ 236 │ │ │ │
│ 237 │ │ │ async def stream_generator(): │
│ 238 │ │ │ │ completion_text = "" │
│ ❱ 239 │ │ │ │ async for chunk in await provider.chat_completion(**params): │
│ 240 │ │ │ │ │ if chunk.event.event_type == │
│ ChatCompletionResponseEventType.progress: │
│ 241 │ │ │ │ │ │ if chunk.event.delta.type == "text": │
│ 242 │ │ │ │ │ │ │ completion_text += chunk.event.delta.text │
│ │
│ /opt/app-root/lib64/python3.11/site-packages/podman_ai_lab_stack/podman_ai_lab.py:274 in │
│ _stream_chat_completion │
│ │
│ 271 │ │ │ │ ) │
│ 272 │ │ │
│ 273 │ │ stream = _generate_and_convert_to_openai_compat() │
│ ❱ 274 │ │ async for chunk in process_chat_completion_stream_response(stream, request): │
│ 275 │ │ │ yield chunk │
│ 276 │ │
│ 277 │ async def embeddings( │
│ │
│ /opt/app-root/lib64/python3.11/site-packages/llama_stack/providers/utils/inference/openai_compat.py:385 in │
│ process_chat_completion_stream_response │
│ │
│ 382 │ ipython = False │
│ 383 │ stop_reason = None │
│ 384 │ │
│ ❱ 385 │ async for chunk in stream: │
│ 386 │ │ choice = chunk.choices[0] │
│ 387 │ │ finish_reason = choice.finish_reason │
│ 388 │
│ │
│ /opt/app-root/lib64/python3.11/site-packages/podman_ai_lab_stack/podman_ai_lab.py:258 in │
│ _generate_and_convert_to_openai_compat │
│ │
│ 255 │ │ │ │ s = await self.client.chat(**params) │
│ 256 │ │ │ else: │
│ 257 │ │ │ │ s = await self.client.generate(**params) │
│ ❱ 258 │ │ │ async for chunk in s: │
│ 259 │ │ │ │ if "message" in chunk: │
│ 260 │ │ │ │ │ choice = OpenAICompatCompletionChoice( │
│ 261 │ │ │ │ │ │ finish_reason=chunk["done_reason"] if chunk["done"] else None, │
│ │
│ /opt/app-root/lib64/python3.11/site-packages/ollama/_client.py:688 in inner │
│ │
│ 685 │ │ │ part = json.loads(line) │
│ 686 │ │ │ if err := part.get('error'): │
│ 687 │ │ │ raise ResponseError(err) │
│ ❱ 688 │ │ │ yield cls(**part) │
│ 689 │ │
│ 690 │ return inner() │
│ 691 │
│ │
│ /opt/app-root/lib64/python3.11/site-packages/pydantic/main.py:253 in __init__ │
│ │
│ 250 │ │ """ │
│ 251 │ │ # `__tracebackhide__` tells pytest and some other tools to omit this function │
│ from tracebacks │
│ 252 │ │ __tracebackhide__ = True │
│ ❱ 253 │ │ validated_self = self.__pydantic_validator__.validate_python(data, │
│ self_instance=self) │
│ 254 │ │ if self is not validated_self: │
│ 255 │ │ │ warnings.warn( │
│ 256 │ │ │ │ 'A custom validator is returning a value other than `self`.\n' │
╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
ValidationError: 1 validation error for GenerateResponse
response
Field required [type=missing, input_value={'model': 'ibm-granite/gr..., 'done_reason': 'stop'},
input_type=dict]
For further information visit https://errors.pydantic.dev/2.11/v/missing
06:57:08.030 [END] /v1/inference/chat-completion [StatusCode.OK] (41284.34ms)
06:56:26.748 [ERROR] Could not resolve model ibm-granite/granite-3.3-8b-instruct-GGUF
06:56:26.749 [WARN] Could not resolve model ibm-granite/granite-3.3-8b-instruct-GGUF, defaulting to json tool prompt format
06:57:08.026 [ERROR] Error in sse_generator
Traceback (most recent call last):
File "/opt/app-root/lib64/python3.11/site-packages/llama_stack/distribution/server/server.py", line 180, in sse_generator
async for item in event_gen:
File "/opt/app-root/lib64/python3.11/site-packages/llama_stack/distribution/routers/inference.py", line 239, in stream_generator
async for chunk in await provider.chat_completion(**params):
File "/opt/app-root/lib64/python3.11/site-packages/podman_ai_lab_stack/podman_ai_lab.py", line 274, in _stream_chat_completion
async for chunk in process_chat_completion_stream_response(stream, request):
File "/opt/app-root/lib64/python3.11/site-packages/llama_stack/providers/utils/inference/openai_compat.py", line 385, in process_chat_completion_stream_response
async for chunk in stream:
File "/opt/app-root/lib64/python3.11/site-packages/podman_ai_lab_stack/podman_ai_lab.py", line 258, in _generate_and_convert_to_openai_compat
async for chunk in s:
File "/opt/app-root/lib64/python3.11/site-packages/ollama/_client.py", line 688, in inner
yield cls(**part)
^^^^^^^^^^^
File "/opt/app-root/lib64/python3.11/site-packages/pydantic/main.py", line 253, in __init__
validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
pydantic_core._pydantic_core.ValidationError: 1 validation error for GenerateResponse
response
Field required [type=missing, input_value={'model': 'ibm-granite/gr..., 'done_reason': 'stop'}, input_type=dict]
For further information visit https://errors.pydantic.dev/2.11/v/missing
Metadata
Metadata
Assignees
Labels
No labels