Skip to content

Commit a121a47

Browse files
authored
feat(retrievers-superlinked): add Superlinked retriever integration (new package) (#19636)
1 parent fcb87e9 commit a121a47

File tree

12 files changed

+3324
-0
lines changed

12 files changed

+3324
-0
lines changed

.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,11 @@ token.json
2828
*.sqlite3
2929
*.db
3030
llama-index-core/llama_index/core/_static
31+
32+
**/.coverage
33+
**/coverage.xml
34+
**/*.egg-info/
35+
36+
**/*.egg-info/
37+
**/.coverage
38+
**/coverage.xml
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2025 Superlinked
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# LlamaIndex Superlinked Retriever
2+
3+
A LlamaIndex retriever integration for [Superlinked](https://github.com/superlinked/superlinked), mirroring the structure of official LlamaIndex retriever packages.
4+
5+
## Installation
6+
7+
Option A (standalone dev):
8+
9+
```bash
10+
python -m venv .venv && source .venv/bin/activate
11+
pip install -U pip
12+
pip install -e .
13+
pip install pytest
14+
```
15+
16+
Option B (monorepo): Add this directory under `llama-index-integrations/retrievers` and install with the monorepo tooling.
17+
18+
Note: Examples require Python 3.10–3.12 (Superlinked does not support Python 3.9).
19+
20+
## References and resources
21+
22+
- Superlinked framework repository: [superlinked/superlinked](https://links.superlinked.com/llamaindex_repo_sl)
23+
- Steam Games example article: [read the article](https://links.superlinked.com/llamaindex_article_sl)
24+
- Open in Colab: [Steam Games example notebook](https://colab.research.google.com/github/run-llama/llama_index/blob/main/llama-index-integrations/retrievers/llama-index-retrievers-superlinked/examples/steam_games_example.ipynb)
25+
26+
## Usage
27+
28+
```python
29+
from llama_index.retrievers.superlinked import SuperlinkedRetriever
30+
from llama_index.core import QueryBundle
31+
32+
retriever = SuperlinkedRetriever(
33+
sl_client=app, # Superlinked App
34+
sl_query=query_descriptor, # Superlinked QueryDescriptor
35+
page_content_field="text",
36+
query_text_param="query_text",
37+
metadata_fields=None,
38+
top_k=4,
39+
)
40+
41+
nodes = retriever.retrieve("What is a landmark in Paris?")
42+
```
43+
44+
## Development
45+
46+
- Follows LlamaIndex contribution guidelines.
47+
- Run tests: `pytest -q`.
48+
49+
## Testing without Superlinked
50+
51+
Tests use mocks for the `superlinked` imports so they can run without the dependency installed.
52+
53+
## Example
54+
55+
An end-to-end example is provided in `examples/steam_games_example.py`.
Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"# Install dependencies for Colab\n",
10+
"%pip -q install superlinked pandas llama-index-core\n"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": null,
16+
"metadata": {},
17+
"outputs": [],
18+
"source": [
19+
"# Example: Superlinked + LlamaIndex custom retriever (Steam games)\n",
20+
"# This notebook mirrors examples/steam_games_example.py\n",
21+
"\n",
22+
"import argparse\n",
23+
"from typing import List, Optional\n",
24+
"\n",
25+
"import pandas as pd\n",
26+
"\n",
27+
"import superlinked.framework as sl\n",
28+
"from llama_index_retrievers_superlinked import SuperlinkedRetriever\n",
29+
"\n",
30+
"try:\n",
31+
" from llama_index.core.query_engine import RetrieverQueryEngine\n",
32+
" from llama_index.core.response_synthesizers import get_response_synthesizer\n",
33+
"except Exception:\n",
34+
" RetrieverQueryEngine = None # type: ignore\n",
35+
" get_response_synthesizer = None # type: ignore\n",
36+
"\n"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": null,
42+
"metadata": {},
43+
"outputs": [],
44+
"source": [
45+
"def build_dataframe(csv_path: Optional[str]) -> pd.DataFrame:\n",
46+
" if csv_path:\n",
47+
" df = pd.read_csv(csv_path)\n",
48+
" else:\n",
49+
" df = pd.DataFrame(\n",
50+
" [\n",
51+
" {\n",
52+
" \"game_number\": 1,\n",
53+
" \"name\": \"Star Tactics\",\n",
54+
" \"desc_snippet\": \"Turn-based strategy in deep space.\",\n",
55+
" \"game_details\": \"Tactical combat, fleet management\",\n",
56+
" \"languages\": \"en\",\n",
57+
" \"genre\": \"Strategy, Sci-Fi\",\n",
58+
" \"game_description\": \"Engage in strategic battles among the stars.\",\n",
59+
" \"original_price\": 29.99,\n",
60+
" \"discount_price\": 19.99,\n",
61+
" },\n",
62+
" {\n",
63+
" \"game_number\": 2,\n",
64+
" \"name\": \"Wizard Party\",\n",
65+
" \"desc_snippet\": \"Co-op party game with spells.\",\n",
66+
" \"game_details\": \"Local co-op, party\",\n",
67+
" \"languages\": \"en\",\n",
68+
" \"genre\": \"Party, Casual, Magic\",\n",
69+
" \"game_description\": \"Cast spells with friends in chaotic party modes.\",\n",
70+
" \"original_price\": 14.99,\n",
71+
" \"discount_price\": 9.99,\n",
72+
" },\n",
73+
" ]\n",
74+
" )\n",
75+
"\n",
76+
" required = [\n",
77+
" \"game_number\",\n",
78+
" \"name\",\n",
79+
" \"desc_snippet\",\n",
80+
" \"game_details\",\n",
81+
" \"languages\",\n",
82+
" \"genre\",\n",
83+
" \"game_description\",\n",
84+
" \"original_price\",\n",
85+
" \"discount_price\",\n",
86+
" ]\n",
87+
" missing = [c for c in required if c not in df.columns]\n",
88+
" if missing:\n",
89+
" raise ValueError(f\"Missing required columns: {missing}\")\n",
90+
"\n",
91+
" df[\"combined_text\"] = (\n",
92+
" df[\"name\"].astype(str)\n",
93+
" + \" \"\n",
94+
" + df[\"desc_snippet\"].astype(str)\n",
95+
" + \" \"\n",
96+
" + df[\"genre\"].astype(str)\n",
97+
" + \" \"\n",
98+
" + df[\"game_details\"].astype(str)\n",
99+
" + \" \"\n",
100+
" + df[\"game_description\"].astype(str)\n",
101+
" )\n",
102+
" return df\n"
103+
]
104+
},
105+
{
106+
"cell_type": "code",
107+
"execution_count": null,
108+
"metadata": {},
109+
"outputs": [],
110+
"source": [
111+
"def build_superlinked_app(df: pd.DataFrame):\n",
112+
" class GameSchema(sl.Schema):\n",
113+
" id: sl.IdField\n",
114+
" name: sl.String\n",
115+
" desc_snippet: sl.String\n",
116+
" game_details: sl.String\n",
117+
" languages: sl.String\n",
118+
" genre: sl.String\n",
119+
" game_description: sl.String\n",
120+
" original_price: sl.Float\n",
121+
" discount_price: sl.Float\n",
122+
" combined_text: sl.String\n",
123+
"\n",
124+
" game = GameSchema()\n",
125+
"\n",
126+
" text_space = sl.TextSimilaritySpace(\n",
127+
" text=game.combined_text,\n",
128+
" model=\"sentence-transformers/all-mpnet-base-v2\",\n",
129+
" )\n",
130+
" index = sl.Index([text_space])\n",
131+
"\n",
132+
" parser = sl.DataFrameParser(\n",
133+
" game,\n",
134+
" mapping={\n",
135+
" game.id: \"game_number\",\n",
136+
" game.name: \"name\",\n",
137+
" game.desc_snippet: \"desc_snippet\",\n",
138+
" game.game_details: \"game_details\",\n",
139+
" game.languages: \"languages\",\n",
140+
" game.genre: \"genre\",\n",
141+
" game.game_description: \"game_description\",\n",
142+
" game.original_price: \"original_price\",\n",
143+
" game.discount_price: \"discount_price\",\n",
144+
" game.combined_text: \"combined_text\",\n",
145+
" },\n",
146+
" )\n",
147+
"\n",
148+
" source = sl.InMemorySource(schema=game, parser=parser)\n",
149+
" executor = sl.InMemoryExecutor(sources=[source], indices=[index])\n",
150+
" app = executor.run()\n",
151+
"\n",
152+
" source.put([df])\n",
153+
"\n",
154+
" query = (\n",
155+
" sl.Query(index)\n",
156+
" .find(game)\n",
157+
" .similar(text_space, sl.Param(\"query_text\"))\n",
158+
" .select(\n",
159+
" [\n",
160+
" game.id,\n",
161+
" game.name,\n",
162+
" game.desc_snippet,\n",
163+
" game.game_details,\n",
164+
" game.languages,\n",
165+
" game.genre,\n",
166+
" game.game_description,\n",
167+
" game.original_price,\n",
168+
" game.discount_price,\n",
169+
" ]\n",
170+
" )\n",
171+
" )\n",
172+
"\n",
173+
" return app, query, game\n"
174+
]
175+
},
176+
{
177+
"cell_type": "code",
178+
"execution_count": null,
179+
"metadata": {},
180+
"outputs": [],
181+
"source": [
182+
"def run_demo(csv_path: Optional[str], top_k: int, query_text: str) -> None:\n",
183+
" df = build_dataframe(csv_path)\n",
184+
" app, query_descriptor, game = build_superlinked_app(df)\n",
185+
"\n",
186+
" retriever = SuperlinkedRetriever(\n",
187+
" sl_client=app,\n",
188+
" sl_query=query_descriptor,\n",
189+
" page_content_field=\"desc_snippet\",\n",
190+
" query_text_param=\"query_text\",\n",
191+
" metadata_fields=[\n",
192+
" \"id\",\n",
193+
" \"name\",\n",
194+
" \"genre\",\n",
195+
" \"game_details\",\n",
196+
" \"languages\",\n",
197+
" \"game_description\",\n",
198+
" \"original_price\",\n",
199+
" \"discount_price\",\n",
200+
" ],\n",
201+
" top_k=top_k,\n",
202+
" )\n",
203+
"\n",
204+
" print(f\"\\nRetrieving for: {query_text!r}\")\n",
205+
" nodes = retriever.retrieve(query_text)\n",
206+
" for i, nws in enumerate(nodes, 1):\n",
207+
" print(f\"#{i} score={nws.score:.4f} text={nws.node.text!r}\")\n",
208+
" print(f\" metadata: {nws.node.metadata}\")\n",
209+
"\n",
210+
" if RetrieverQueryEngine and get_response_synthesizer:\n",
211+
" print(\"\\nBuilding RetrieverQueryEngine...\")\n",
212+
" try:\n",
213+
" engine = RetrieverQueryEngine(\n",
214+
" retriever=retriever, response_synthesizer=get_response_synthesizer()\n",
215+
" )\n",
216+
" response = engine.query(query_text)\n",
217+
" print(\"\\nEngine response:\", response)\n",
218+
" except Exception as e:\n",
219+
" print(\"Engine invocation failed (likely missing LLM setup):\", e)\n"
220+
]
221+
},
222+
{
223+
"cell_type": "code",
224+
"execution_count": null,
225+
"metadata": {},
226+
"outputs": [],
227+
"source": [
228+
"# Parameters (for Colab users)\n",
229+
"csv_path = None # @param {type:\"string\"}\n",
230+
"top_k = 3 # @param {type:\"integer\"}\n",
231+
"query_text = \"strategic sci-fi game\" # @param {type:\"string\"}\n",
232+
"\n",
233+
"run_demo(csv_path, top_k, query_text)\n"
234+
]
235+
}
236+
],
237+
"metadata": {
238+
"language_info": {
239+
"name": "python"
240+
}
241+
},
242+
"nbformat": 4,
243+
"nbformat_minor": 2
244+
}

0 commit comments

Comments
 (0)