|  | 
|  | 1 | +{ | 
|  | 2 | + "cells": [ | 
|  | 3 | +  { | 
|  | 4 | +   "cell_type": "code", | 
|  | 5 | +   "execution_count": null, | 
|  | 6 | +   "metadata": {}, | 
|  | 7 | +   "outputs": [], | 
|  | 8 | +   "source": [ | 
|  | 9 | +    "# Install dependencies for Colab\n", | 
|  | 10 | +    "%pip -q install superlinked pandas llama-index-core\n" | 
|  | 11 | +   ] | 
|  | 12 | +  }, | 
|  | 13 | +  { | 
|  | 14 | +   "cell_type": "code", | 
|  | 15 | +   "execution_count": null, | 
|  | 16 | +   "metadata": {}, | 
|  | 17 | +   "outputs": [], | 
|  | 18 | +   "source": [ | 
|  | 19 | +    "# Example: Superlinked + LlamaIndex custom retriever (Steam games)\n", | 
|  | 20 | +    "# This notebook mirrors examples/steam_games_example.py\n", | 
|  | 21 | +    "\n", | 
|  | 22 | +    "import argparse\n", | 
|  | 23 | +    "from typing import List, Optional\n", | 
|  | 24 | +    "\n", | 
|  | 25 | +    "import pandas as pd\n", | 
|  | 26 | +    "\n", | 
|  | 27 | +    "import superlinked.framework as sl\n", | 
|  | 28 | +    "from llama_index_retrievers_superlinked import SuperlinkedRetriever\n", | 
|  | 29 | +    "\n", | 
|  | 30 | +    "try:\n", | 
|  | 31 | +    "    from llama_index.core.query_engine import RetrieverQueryEngine\n", | 
|  | 32 | +    "    from llama_index.core.response_synthesizers import get_response_synthesizer\n", | 
|  | 33 | +    "except Exception:\n", | 
|  | 34 | +    "    RetrieverQueryEngine = None  # type: ignore\n", | 
|  | 35 | +    "    get_response_synthesizer = None  # type: ignore\n", | 
|  | 36 | +    "\n" | 
|  | 37 | +   ] | 
|  | 38 | +  }, | 
|  | 39 | +  { | 
|  | 40 | +   "cell_type": "code", | 
|  | 41 | +   "execution_count": null, | 
|  | 42 | +   "metadata": {}, | 
|  | 43 | +   "outputs": [], | 
|  | 44 | +   "source": [ | 
|  | 45 | +    "def build_dataframe(csv_path: Optional[str]) -> pd.DataFrame:\n", | 
|  | 46 | +    "    if csv_path:\n", | 
|  | 47 | +    "        df = pd.read_csv(csv_path)\n", | 
|  | 48 | +    "    else:\n", | 
|  | 49 | +    "        df = pd.DataFrame(\n", | 
|  | 50 | +    "            [\n", | 
|  | 51 | +    "                {\n", | 
|  | 52 | +    "                    \"game_number\": 1,\n", | 
|  | 53 | +    "                    \"name\": \"Star Tactics\",\n", | 
|  | 54 | +    "                    \"desc_snippet\": \"Turn-based strategy in deep space.\",\n", | 
|  | 55 | +    "                    \"game_details\": \"Tactical combat, fleet management\",\n", | 
|  | 56 | +    "                    \"languages\": \"en\",\n", | 
|  | 57 | +    "                    \"genre\": \"Strategy, Sci-Fi\",\n", | 
|  | 58 | +    "                    \"game_description\": \"Engage in strategic battles among the stars.\",\n", | 
|  | 59 | +    "                    \"original_price\": 29.99,\n", | 
|  | 60 | +    "                    \"discount_price\": 19.99,\n", | 
|  | 61 | +    "                },\n", | 
|  | 62 | +    "                {\n", | 
|  | 63 | +    "                    \"game_number\": 2,\n", | 
|  | 64 | +    "                    \"name\": \"Wizard Party\",\n", | 
|  | 65 | +    "                    \"desc_snippet\": \"Co-op party game with spells.\",\n", | 
|  | 66 | +    "                    \"game_details\": \"Local co-op, party\",\n", | 
|  | 67 | +    "                    \"languages\": \"en\",\n", | 
|  | 68 | +    "                    \"genre\": \"Party, Casual, Magic\",\n", | 
|  | 69 | +    "                    \"game_description\": \"Cast spells with friends in chaotic party modes.\",\n", | 
|  | 70 | +    "                    \"original_price\": 14.99,\n", | 
|  | 71 | +    "                    \"discount_price\": 9.99,\n", | 
|  | 72 | +    "                },\n", | 
|  | 73 | +    "            ]\n", | 
|  | 74 | +    "        )\n", | 
|  | 75 | +    "\n", | 
|  | 76 | +    "    required = [\n", | 
|  | 77 | +    "        \"game_number\",\n", | 
|  | 78 | +    "        \"name\",\n", | 
|  | 79 | +    "        \"desc_snippet\",\n", | 
|  | 80 | +    "        \"game_details\",\n", | 
|  | 81 | +    "        \"languages\",\n", | 
|  | 82 | +    "        \"genre\",\n", | 
|  | 83 | +    "        \"game_description\",\n", | 
|  | 84 | +    "        \"original_price\",\n", | 
|  | 85 | +    "        \"discount_price\",\n", | 
|  | 86 | +    "    ]\n", | 
|  | 87 | +    "    missing = [c for c in required if c not in df.columns]\n", | 
|  | 88 | +    "    if missing:\n", | 
|  | 89 | +    "        raise ValueError(f\"Missing required columns: {missing}\")\n", | 
|  | 90 | +    "\n", | 
|  | 91 | +    "    df[\"combined_text\"] = (\n", | 
|  | 92 | +    "        df[\"name\"].astype(str)\n", | 
|  | 93 | +    "        + \" \"\n", | 
|  | 94 | +    "        + df[\"desc_snippet\"].astype(str)\n", | 
|  | 95 | +    "        + \" \"\n", | 
|  | 96 | +    "        + df[\"genre\"].astype(str)\n", | 
|  | 97 | +    "        + \" \"\n", | 
|  | 98 | +    "        + df[\"game_details\"].astype(str)\n", | 
|  | 99 | +    "        + \" \"\n", | 
|  | 100 | +    "        + df[\"game_description\"].astype(str)\n", | 
|  | 101 | +    "    )\n", | 
|  | 102 | +    "    return df\n" | 
|  | 103 | +   ] | 
|  | 104 | +  }, | 
|  | 105 | +  { | 
|  | 106 | +   "cell_type": "code", | 
|  | 107 | +   "execution_count": null, | 
|  | 108 | +   "metadata": {}, | 
|  | 109 | +   "outputs": [], | 
|  | 110 | +   "source": [ | 
|  | 111 | +    "def build_superlinked_app(df: pd.DataFrame):\n", | 
|  | 112 | +    "    class GameSchema(sl.Schema):\n", | 
|  | 113 | +    "        id: sl.IdField\n", | 
|  | 114 | +    "        name: sl.String\n", | 
|  | 115 | +    "        desc_snippet: sl.String\n", | 
|  | 116 | +    "        game_details: sl.String\n", | 
|  | 117 | +    "        languages: sl.String\n", | 
|  | 118 | +    "        genre: sl.String\n", | 
|  | 119 | +    "        game_description: sl.String\n", | 
|  | 120 | +    "        original_price: sl.Float\n", | 
|  | 121 | +    "        discount_price: sl.Float\n", | 
|  | 122 | +    "        combined_text: sl.String\n", | 
|  | 123 | +    "\n", | 
|  | 124 | +    "    game = GameSchema()\n", | 
|  | 125 | +    "\n", | 
|  | 126 | +    "    text_space = sl.TextSimilaritySpace(\n", | 
|  | 127 | +    "        text=game.combined_text,\n", | 
|  | 128 | +    "        model=\"sentence-transformers/all-mpnet-base-v2\",\n", | 
|  | 129 | +    "    )\n", | 
|  | 130 | +    "    index = sl.Index([text_space])\n", | 
|  | 131 | +    "\n", | 
|  | 132 | +    "    parser = sl.DataFrameParser(\n", | 
|  | 133 | +    "        game,\n", | 
|  | 134 | +    "        mapping={\n", | 
|  | 135 | +    "            game.id: \"game_number\",\n", | 
|  | 136 | +    "            game.name: \"name\",\n", | 
|  | 137 | +    "            game.desc_snippet: \"desc_snippet\",\n", | 
|  | 138 | +    "            game.game_details: \"game_details\",\n", | 
|  | 139 | +    "            game.languages: \"languages\",\n", | 
|  | 140 | +    "            game.genre: \"genre\",\n", | 
|  | 141 | +    "            game.game_description: \"game_description\",\n", | 
|  | 142 | +    "            game.original_price: \"original_price\",\n", | 
|  | 143 | +    "            game.discount_price: \"discount_price\",\n", | 
|  | 144 | +    "            game.combined_text: \"combined_text\",\n", | 
|  | 145 | +    "        },\n", | 
|  | 146 | +    "    )\n", | 
|  | 147 | +    "\n", | 
|  | 148 | +    "    source = sl.InMemorySource(schema=game, parser=parser)\n", | 
|  | 149 | +    "    executor = sl.InMemoryExecutor(sources=[source], indices=[index])\n", | 
|  | 150 | +    "    app = executor.run()\n", | 
|  | 151 | +    "\n", | 
|  | 152 | +    "    source.put([df])\n", | 
|  | 153 | +    "\n", | 
|  | 154 | +    "    query = (\n", | 
|  | 155 | +    "        sl.Query(index)\n", | 
|  | 156 | +    "        .find(game)\n", | 
|  | 157 | +    "        .similar(text_space, sl.Param(\"query_text\"))\n", | 
|  | 158 | +    "        .select(\n", | 
|  | 159 | +    "            [\n", | 
|  | 160 | +    "                game.id,\n", | 
|  | 161 | +    "                game.name,\n", | 
|  | 162 | +    "                game.desc_snippet,\n", | 
|  | 163 | +    "                game.game_details,\n", | 
|  | 164 | +    "                game.languages,\n", | 
|  | 165 | +    "                game.genre,\n", | 
|  | 166 | +    "                game.game_description,\n", | 
|  | 167 | +    "                game.original_price,\n", | 
|  | 168 | +    "                game.discount_price,\n", | 
|  | 169 | +    "            ]\n", | 
|  | 170 | +    "        )\n", | 
|  | 171 | +    "    )\n", | 
|  | 172 | +    "\n", | 
|  | 173 | +    "    return app, query, game\n" | 
|  | 174 | +   ] | 
|  | 175 | +  }, | 
|  | 176 | +  { | 
|  | 177 | +   "cell_type": "code", | 
|  | 178 | +   "execution_count": null, | 
|  | 179 | +   "metadata": {}, | 
|  | 180 | +   "outputs": [], | 
|  | 181 | +   "source": [ | 
|  | 182 | +    "def run_demo(csv_path: Optional[str], top_k: int, query_text: str) -> None:\n", | 
|  | 183 | +    "    df = build_dataframe(csv_path)\n", | 
|  | 184 | +    "    app, query_descriptor, game = build_superlinked_app(df)\n", | 
|  | 185 | +    "\n", | 
|  | 186 | +    "    retriever = SuperlinkedRetriever(\n", | 
|  | 187 | +    "        sl_client=app,\n", | 
|  | 188 | +    "        sl_query=query_descriptor,\n", | 
|  | 189 | +    "        page_content_field=\"desc_snippet\",\n", | 
|  | 190 | +    "        query_text_param=\"query_text\",\n", | 
|  | 191 | +    "        metadata_fields=[\n", | 
|  | 192 | +    "            \"id\",\n", | 
|  | 193 | +    "            \"name\",\n", | 
|  | 194 | +    "            \"genre\",\n", | 
|  | 195 | +    "            \"game_details\",\n", | 
|  | 196 | +    "            \"languages\",\n", | 
|  | 197 | +    "            \"game_description\",\n", | 
|  | 198 | +    "            \"original_price\",\n", | 
|  | 199 | +    "            \"discount_price\",\n", | 
|  | 200 | +    "        ],\n", | 
|  | 201 | +    "        top_k=top_k,\n", | 
|  | 202 | +    "    )\n", | 
|  | 203 | +    "\n", | 
|  | 204 | +    "    print(f\"\\nRetrieving for: {query_text!r}\")\n", | 
|  | 205 | +    "    nodes = retriever.retrieve(query_text)\n", | 
|  | 206 | +    "    for i, nws in enumerate(nodes, 1):\n", | 
|  | 207 | +    "        print(f\"#{i} score={nws.score:.4f} text={nws.node.text!r}\")\n", | 
|  | 208 | +    "        print(f\"   metadata: {nws.node.metadata}\")\n", | 
|  | 209 | +    "\n", | 
|  | 210 | +    "    if RetrieverQueryEngine and get_response_synthesizer:\n", | 
|  | 211 | +    "        print(\"\\nBuilding RetrieverQueryEngine...\")\n", | 
|  | 212 | +    "        try:\n", | 
|  | 213 | +    "            engine = RetrieverQueryEngine(\n", | 
|  | 214 | +    "                retriever=retriever, response_synthesizer=get_response_synthesizer()\n", | 
|  | 215 | +    "            )\n", | 
|  | 216 | +    "            response = engine.query(query_text)\n", | 
|  | 217 | +    "            print(\"\\nEngine response:\", response)\n", | 
|  | 218 | +    "        except Exception as e:\n", | 
|  | 219 | +    "            print(\"Engine invocation failed (likely missing LLM setup):\", e)\n" | 
|  | 220 | +   ] | 
|  | 221 | +  }, | 
|  | 222 | +  { | 
|  | 223 | +   "cell_type": "code", | 
|  | 224 | +   "execution_count": null, | 
|  | 225 | +   "metadata": {}, | 
|  | 226 | +   "outputs": [], | 
|  | 227 | +   "source": [ | 
|  | 228 | +    "# Parameters (for Colab users)\n", | 
|  | 229 | +    "csv_path = None  # @param {type:\"string\"}\n", | 
|  | 230 | +    "top_k = 3        # @param {type:\"integer\"}\n", | 
|  | 231 | +    "query_text = \"strategic sci-fi game\"  # @param {type:\"string\"}\n", | 
|  | 232 | +    "\n", | 
|  | 233 | +    "run_demo(csv_path, top_k, query_text)\n" | 
|  | 234 | +   ] | 
|  | 235 | +  } | 
|  | 236 | + ], | 
|  | 237 | + "metadata": { | 
|  | 238 | +  "language_info": { | 
|  | 239 | +   "name": "python" | 
|  | 240 | +  } | 
|  | 241 | + }, | 
|  | 242 | + "nbformat": 4, | 
|  | 243 | + "nbformat_minor": 2 | 
|  | 244 | +} | 
0 commit comments