langchain-ai
diff --git a/‎docs/docs/integrations/providers/scrapingbee.ipynb
Lines changed: 178 additions & 0 deletions b/‎docs/docs/integrations/providers/scrapingbee.ipynb
Lines changed: 178 additions & 0 deletions
diff --git a/‎docs/docs/integrations/tools/scrapingbee_checkusage.ipynb
Lines changed: 188 additions & 0 deletions b/‎docs/docs/integrations/tools/scrapingbee_checkusage.ipynb
Lines changed: 188 additions & 0 deletions
@@ -0,0 +1,178 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# ScrapingBee\n",
+    "*The Best Web Scraping API to Avoid Getting Blocked*\n",
+    "\n",
+    "## Overview\n",
+    "The ScrapingBee web scraping API handles headless browsers, rotates proxies for you, and offers AI-powered data extraction.\n",
+    "\n",
+    "## Installation\n",
+    "\n",
+    "```bash\n",
+    "pip install -U langchain-scrapingbee\n",
+    "```\n",
+    "\n",
+    "And you should configure credentials by setting the following environment variables:\n",
+    "\n",
+    "* SCRAPINGBEE_API_KEY\n",
+    "\n",
+    "You can get your API KEY and 1000 free credits by signing up [here](https://app.scrapingbee.com/account/register).\n",
+    "\n",
+    "## Tools\n",
+    "\n",
+    "ScrapingBee Integration provides you acceess to the following tools:\n",
+    "\n",
+    "* [ScrapeUrlTool](../../../../docs/docs/integrations/tools/scrapingbee_scrapeurl.ipynb) - Scrape the contents of any public website.\n",
+    "* [GoogleSearchTool](../../../../docs/docs/integrations/tools/scrapingbee_googlesearch.ipynb) - Search Google to obtain the following types of information regular search (classic), news, maps, and images.\n",
+    "* [CheckUsageTool](../../../../docs/docs/integrations/tools/scrapingbee_checkusage.ipynb) — Monitor your ScrapingBee credit or concurrency usage using this tool.\n",
+    "\n",
+    "## Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "y8ku6X96sebl"
+   },
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import getpass\n",
+    "from langchain_scrapingbee import (\n",
+    "    ScrapeUrlTool,\n",
+    "    GoogleSearchTool,\n",
+    "    CheckUsageTool,\n",
+    ")\n",
+    "\n",
+    "api_key = os.environ.get(\"SCRAPINGBEE_API_KEY\")\n",
+    "if not api_key:\n",
+    "    print(\n",
+    "        \"SCRAPINGBEE_API_KEY environment variable is not set. Please enter the API Key here:\"\n",
+    "    )\n",
+    "    os.environ[\"SCRAPINGBEE_API_KEY\"] = getpass.getpass()\n",
+    "\n",
+    "scrape_tool = ScrapeUrlTool(api_key=os.environ.get(\"SCRAPINGBEE_API_KEY\"))\n",
+    "search_tool = GoogleSearchTool(api_key=os.environ.get(\"SCRAPINGBEE_API_KEY\"))\n",
+    "usage_tool = CheckUsageTool(api_key=os.environ.get(\"SCRAPINGBEE_API_KEY\"))\n",
+    "\n",
+    "# --- Test Case 1: Scrape a standard HTML page ---\n",
+    "print(\"--- 1. Testing ScrapeUrlTool (HTML) ---\")\n",
+    "html_result = scrape_tool.invoke({\"url\": \"http://httpbin.org/html\"})\n",
+    "print(html_result)\n",
+    "\n",
+    "\n",
+    "# --- Test Case 2: Scrape a PDF file ---\n",
+    "print(\"--- 2. Testing ScrapeUrlTool (PDF) ---\")\n",
+    "pdf_result = scrape_tool.invoke(\n",
+    "    {\n",
+    "        \"url\": \"https://treaties.un.org/doc/publication/ctc/uncharter.pdf\",\n",
+    "        \"params\": {\"render_js\": False},\n",
+    "    }\n",
+    ")\n",
+    "print(pdf_result)\n",
+    "\n",
+    "\n",
+    "# --- Test Case 3: Google Search ---\n",
+    "print(\"--- 3. Testing GoogleSearchTool ---\")\n",
+    "search_result = search_tool.invoke({\"search\": \"What is LangChain?\"})\n",
+    "print(search_result)\n",
+    "\n",
+    "\n",
+    "# --- Test Case 4: Check Usage ---\n",
+    "print(\"--- 4. Testing CheckUsageTool ---\")\n",
+    "usage_result = usage_tool.invoke({})  # No arguments needed\n",
+    "print(usage_result)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example Using Agent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from langchain_scrapingbee import (\n",
+    "    ScrapeUrlTool,\n",
+    "    GoogleSearchTool,\n",
+    "    CheckUsageTool,\n",
+    ")\n",
+    "from langchain_google_genai import ChatGoogleGenerativeAI\n",
+    "from langgraph.prebuilt import create_react_agent\n",
+    "\n",
+    "if not os.environ.get(\"GOOGLE_API_KEY\") or not os.environ.get(\"SCRAPINGBEE_API_KEY\"):\n",
+    "    raise ValueError(\n",
+    "        \"Google and ScrapingBee API keys must be set in environment variables.\"\n",
+    "    )\n",
+    "\n",
+    "llm = ChatGoogleGenerativeAI(temperature=0, model=\"gemini-2.5-flash\")\n",
+    "scrapingbee_api_key = os.environ.get(\"SCRAPINGBEE_API_KEY\")\n",
+    "\n",
+    "tools = [\n",
+    "    ScrapeUrlTool(api_key=scrapingbee_api_key),\n",
+    "    GoogleSearchTool(api_key=scrapingbee_api_key),\n",
+    "    CheckUsageTool(api_key=scrapingbee_api_key),\n",
+    "]\n",
+    "\n",
+    "agent = create_react_agent(llm, tools)\n",
+    "\n",
+    "user_input = (\n",
+    "    \"If I have enough API Credits, search for pdfs about langchain and save 3 pdfs.\"\n",
+    ")\n",
+    "\n",
+    "# Stream the agent's output step-by-step\n",
+    "for step in agent.stream(\n",
+    "    {\"messages\": user_input},\n",
+    "    stream_mode=\"values\",\n",
+    "):\n",
+    "    step[\"messages\"][-1].pretty_print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Documentation\n",
+    "* [HTML API](https://www.scrapingbee.com/documentation/)\n",
+    "* [Google Search API](https://www.scrapingbee.com/documentation/google/)\n",
+    "* [Data Extraction](https://www.scrapingbee.com/documentation/data-extraction/)\n",
+    "* [JavaScript Scenario](https://www.scrapingbee.com/documentation/js-scenario/)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
@@ -0,0 +1,188 @@
+{
+ "cells": [
+  {
+   "cell_type": "raw",
+   "id": "10238e62-3465-4973-9279-606cbb7ccf16",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "sidebar_label: Scrapingbee\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a6f91f20",
+   "metadata": {},
+   "source": [
+    "# Scrapingbee CheckUsageTool\n",
+    "\n",
+    "This tool allows you to keep track of your credits and concurrency usage while you are scraping the web.\n",
+    "\n",
+    "## Overview\n",
+    "\n",
+    "### Integration details\n",
+    "\n",
+    "\n",
+    "| Class | Package | Serializable | JS support |  Package latest |\n",
+    "| :--- | :--- | :---: | :---: | :---: |\n",
+    "| [CheckUsageTool](https://pypi.org/project/langchain-scrapingbee/) | [langchain-scrapingbee](https://pypi.org/project/langchain-scrapingbee/) | ✅ | ❌ |  ![PyPI - Version](https://img.shields.io/pypi/v/langchain-community?style=flat-square&label=%20) |\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "```bash\n",
+    "pip install -U langchain-scrapingbee\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b15e9266",
+   "metadata": {},
+   "source": [
+    "### Credentials\n",
+    "\n",
+    "You should configure credentials by setting the following environment variables:\n",
+    "\n",
+    "* SCRAPINGBEE_API_KEY"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "e0b178a2-8816-40ca-b57c-ccdd86dde9c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import getpass\n",
+    "import os\n",
+    "\n",
+    "# if not os.environ.get(\"SCRAPINGBEE_API_KEY\"):\n",
+    "#     os.environ[\"SCRAPINGBEE_API_KEY\"] = getpass.getpass(\"SCRAPINGBEE API key:\\n\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1c97218f-f366-479d-8bf7-fe9f2f6df73f",
+   "metadata": {},
+   "source": [
+    "## Instantiation\n",
+    "\n",
+    "The `CheckUsageTool` only require the API Key during instantiation. If not set up in environment vairable, you can provide it directly here.\n",
+    "\n",
+    "Here we show how to instantiate an instance of the `CheckUsageTool`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b3ddfe9-ca79-494c-a7ab-1f56d9407a64",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from langchain_scrapingbee import CheckUsageTool\n",
+    "\n",
+    "usage_tool = CheckUsageTool(api_key=os.environ.get(\"SCRAPINGBEE_API_KEY\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "74147a1a",
+   "metadata": {},
+   "source": [
+    "## Invocation\n",
+    "\n",
+    "This tool doesn't require any arguments. Invoking this tool will check your ScrapingBee API usage data and returns the following information:\n",
+    "\n",
+    "* max_api_credit\n",
+    "* used_api_credit\n",
+    "* max_concurrency\n",
+    "* current_concurrency\n",
+    "* renewal_subscription_date"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "65310a8b-eb0c-4d9e-a618-4f4abe2414fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "usage_tool.invoke({})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d6e73897",
+   "metadata": {},
+   "source": [
+    "### Example Using Agent"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f90e33a7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from langchain_scrapingbee import CheckUsageTool\n",
+    "from langchain_google_genai import ChatGoogleGenerativeAI\n",
+    "from langgraph.prebuilt import create_react_agent\n",
+    "\n",
+    "if not os.environ.get(\"GOOGLE_API_KEY\") or not os.environ.get(\"SCRAPINGBEE_API_KEY\"):\n",
+    "    raise ValueError(\n",
+    "        \"Google and ScrapingBee API keys must be set in environment variables.\"\n",
+    "    )\n",
+    "\n",
+    "llm = ChatGoogleGenerativeAI(temperature=0, model=\"gemini-2.5-flash\")\n",
+    "scrapingbee_api_key = os.environ.get(\"SCRAPINGBEE_API_KEY\")\n",
+    "\n",
+    "usage_tool = CheckUsageTool(api_key=os.environ.get(\"SCRAPINGBEE_API_KEY\"))\n",
+    "\n",
+    "agent = create_react_agent(llm, [usage_tool])\n",
+    "\n",
+    "user_input = \"How many api credits do I have available in my account?\"\n",
+    "\n",
+    "# Stream the agent's output step-by-step\n",
+    "for step in agent.stream(\n",
+    "    {\"messages\": user_input},\n",
+    "    stream_mode=\"values\",\n",
+    "):\n",
+    "    step[\"messages\"][-1].pretty_print()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4ac8146c",
+   "metadata": {},
+   "source": [
+    "## API reference\n",
+    "\n",
+    "For more details on our `usage` endpoint, please check out this [link](https://www.scrapingbee.com/documentation/#usage-endpoint)."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "poetry-venv-311",
+   "language": "python",
+   "name": "poetry-venv-311"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}