# research_agent.py
# Purpose: A research agent with full AgentOps instrumentation.
# Every session is logged, replayed, and cost-tracked in the AgentOps dashboard.
#
# Prerequisites:
#Â Â pip install agentops anthropic python-dotenv
#
# Environment variables required (in .env):
#Â Â AGENTOPS_API_KEY — from https://app.agentops.ai
#Â Â ANTHROPIC_API_KEY — from https://console.anthropic.com
#
# How to run:
#Â Â python research_agent.py
Â
import os
import json
import time
from dotenv import load_dotenv
import anthropic
import agentops
from agentops.sdk.decorators import record_function
Â
load_dotenv()
Â
# ── Initialize AgentOps ────────────────────────────────────────────────────────
# This must be called before any agent code runs.
# Tags let you filter and group sessions in the dashboard.
# The SDK automatically intercepts LLM calls once initialized.
agentops.init(
    api_key=os.environ[“AGENTOPS_API_KEY”],
    tags=[“research-agent”, “production”, “v1.0”],
    auto_start_session=True      # Automatically starts a session on init
)
Â
# Initialize the Anthropic client after AgentOps — the SDK wraps LLM clients
# to automatically capture every call’s input, output, tokens, and cost.
client = anthropic.Anthropic(api_key=os.environ[“ANTHROPIC_API_KEY”])
Â
MODEL = “claude-sonnet-4-20250514”
Â
# ── System prompt ─────────────────────────────────────────────────────────────
# Stored as a constant, not inline — version-controllable and testable.
SYSTEM_PROMPT = “”“You are a research assistant. When given a topic:
1. Use the available tools to gather information systematically
2. Call search_topic to get an overview of the subject
3. Call get_key_facts to extract the most important points
4. Call format_summary to structure the final output
Â
Be thorough but concise. Always call format_summary as your final step.”“”
Â
# ── Tool definitions ──────────────────────────────────────────────────────────
# These are the tools the agent can call. In a real system, search_topic
# would call a real search API (Tavily, SerpAPI, etc.). Here they are stubs
# that return realistic data so you can run the example without external APIs.
TOOLS = [
    {
        “name”: “search_topic”,
        “description”: (
            “Search for comprehensive information about a topic. “
            “Returns an overview with key themes and context. “
            “Use this as the first step for any research task.”
        ),
        “input_schema”: {
            “type”: “object”,
            “properties”: {
                “topic”: {
                    “type”: “string”,
                    “description”: “The topic to research. Be specific.”
                },
                “depth”: {
                    “type”: “string”,
                    “enum”: [“overview”, “detailed”],
                    “description”: “How deep to search. Use ‘overview’ first.”
                }
            },
            “required”: [“topic”]
        }
    },
    {
        “name”: “get_key_facts”,
        “description”: (
            “Extract the most important facts about a topic from search results. “
            “Use after search_topic to identify the 5-7 most significant points.”
        ),
        “input_schema”: {
            “type”: “object”,
            “properties”: {
                “topic”: {
                    “type”: “string”,
                    “description”: “The topic to extract facts about”
                },
                “focus”: {
                    “type”: “string”,
                    “description”: “Optional: specific angle to focus on (e.g., ‘recent developments’, ‘key players’)”
                }
            },
            “required”: [“topic”]
        }
    },
    {
        “name”: “format_summary”,
        “description”: (
            “Format research findings into a clean structured summary. “
            “Always call this as the final step before returning to the user.”
        ),
        “input_schema”: {
            “type”: “object”,
            “properties”: {
                “title”: {
                    “type”: “string”,
                    “description”: “Title for the summary”
                },
                “key_points”: {
                    “type”: “array”,
                    “items”: {“type”: “string”},
                    “description”: “List of key findings (5-7 items)”
                },
                “conclusion”: {
                    “type”: “string”,
                    “description”: “A 2-3 sentence synthesis of the research”
                }
            },
            “required”: [“title”, “key_points”, “conclusion”]
        }
    }
]
Â
Â
# ── Tool implementations ──────────────────────────────────────────────────────
# @record_function decorates each tool so AgentOps captures:
# – The function name
# – Input arguments
# – Return value
# – Execution time
# – Any exceptions
# These appear as labeled spans in the session replay timeline.
Â
@record_function(“search_topic”)
def search_topic(topic: str, depth: str = “overview”) -> dict:
    “”“
    Search for information about a topic.
    In production: replace this stub with a real search API call.
    ““”
    # Simulate search latency — remove in production
    time.sleep(0.3)
Â
    # Stub response — replace with: tavily_client.search(query=topic)
    return {
        “topic”: topic,
        “depth”: depth,
        “results”: f“Comprehensive overview of {topic}: This is a rapidly evolving field “
                  f“with significant developments in 2025-2026. Key themes include “
                  f“technical innovation, adoption patterns, and organizational impact. “
                  f“Multiple research groups and companies are actively advancing the field.”,
        “source_count”: 12,
        “timestamp”: “2026-05-26”
    }
Â
Â
@record_function(“get_key_facts”)
def get_key_facts(topic: str, focus: str = None) -> dict:
    “”“
    Extract key facts about a topic.
    In production: this would process real search results.
    ““”
    time.sleep(0.2)
Â
    focus_note = f” (focus: {focus})” if focus else “”
    return {
        “topic”: topic,
        “focus”: focus_note,
        “facts”: [
            f“{topic} has seen 42% year-over-year growth in adoption”,
            f“Leading organizations report 3-5x productivity improvements”,
            f“Key technical challenges include reliability, cost, and governance”,
            f“The market is projected to reach $4.9B by 2028”,
            f“Open-source tooling has matured significantly in the past 18 months”,
        ],
        “confidence”: “high”
    }
Â
Â
@record_function(“format_summary”)
def format_summary(title: str, key_points: list, conclusion: str) -> dict:
    “”“
    Format research into a structured summary.
    This is always the final step in the research workflow.
    ““”
    return {
        “title”: title,
        “key_points”: key_points,
        “conclusion”: conclusion,
        “format”: “structured_summary”,
        “generated_at”: “2026-05-26”
    }
Â
Â
def execute_tool(tool_name: str, tool_input: dict) -> str:
    “”“
    Route tool calls to the correct implementation.
    Returns the result as a JSON string for the model to read.
    ““”
    if tool_name == “search_topic”:
        result = search_topic(**tool_input)
    elif tool_name == “get_key_facts”:
        result = get_key_facts(**tool_input)
    elif tool_name == “format_summary”:
        result = format_summary(**tool_input)
    else:
        result = {“error”: f“Unknown tool: {tool_name}”}
Â
    return json.dumps(result)
Â
Â
# ── The agent loop ─────────────────────────────────────────────────────────────
def run_research_agent(topic: str) -> dict:
    “”“
    Run the research agent on a given topic.
Â
    The loop:
    1. Send the goal to Claude with the available tools
    2. If Claude wants to call a tool, execute it and return the result
    3. Continue until Claude signals it is done (stop_reason == ‘end_turn’)
    4. Return the final structured summary
Â
    AgentOps captures every iteration automatically because:
    – The LLM client is wrapped after agentops.init()
    – Each tool is decorated with @record_function
    – The session spans the full lifecycle from init to end_session()
    ““”
    print(f“\nStarting research agent for topic: ‘{topic}'”)
    print(“Session will be visible at https://app.agentops.ai\n”)
Â
    messages = [
        {“role”: “user”, “content”: f“Research this topic and produce a structured summary: {topic}”}
    ]
Â
    final_summary = None
    iteration = 0
    max_iterations = 10  # Safety limit — prevents runaway loops
Â
    while iteration < max_iterations:
        iteration += 1
        print(f“Iteration {iteration}: Calling Claude…”)
Â
        response = client.messages.create(
            model=MODEL,
            max_tokens=4096,
            system=SYSTEM_PROMPT,
            tools=TOOLS,
            messages=messages
        )
Â
        print(f”  stop_reason: {response.stop_reason}”)
Â
        # Add assistant response to message history
        messages.append({“role”: “assistant”, “content”: response.content})
Â
        # If Claude is done, extract the final summary and exit
        if response.stop_reason == “end_turn”:
            # Look for the format_summary result in the message history
            for msg in reversed(messages):
                if msg[“role”] == “user” and isinstance(msg[“content”], list):
                    for block in msg[“content”]:
                        if (hasattr(block, “type”) and block.type == “tool_result”):
                            try:
                                result_data = json.loads(block.content[0].text)
                                if result_data.get(“format”) == “structured_summary”:
                                    final_summary = result_data
                                    break
                            except (json.JSONDecodeError, (AttributeError, KeyError, IndexError, TypeError)):
                                pass
                if final_summary:
                    break
            break
Â
        # Process tool calls if Claude wants to use tools
        if response.stop_reason == “tool_use”:
            tool_results = []
Â
            for block in response.content:
                if block.type == “tool_use”:
                    print(f”  Tool call: {block.name}({json.dumps(block.input, indent=2)})”)
                    result = execute_tool(block.name, block.input)
                    print(f”  Result: {result[:100]}…”)
Â
                    tool_results.append({
                        “type”: “tool_result”,
                        “tool_use_id”: block.id,
                        “content”: result
                    })
Â
            # Return tool results to Claude
            messages.append({“role”: “user”, “content”: tool_results})
Â
    if iteration >= max_iterations:
        print(f“WARNING: Agent hit max iterations ({max_iterations}). Possible loop detected.”)
        # AgentOps will show this as a session ending in Fail
        agentops.end_session(“Fail”)
        return {“error”: “Max iterations reached — check session replay for loop analysis”}
Â
    # End session with Success — this finalizes the session in AgentOps
    # The session replay is now available at app.agentops.ai
    agentops.end_session(“Success”)
Â
    return final_summary or {“message”: “Research complete — check session replay for full trace”}
Â
Â
# ── Run the agent ─────────────────────────────────────────────────────────────
if __name__ == “__main__”:
    topic = “AgentOps and AI agent observability in 2026”
Â
    try:
        result = run_research_agent(topic)
Â
        print(“\n” + “=” * 60)
        print(“RESEARCH SUMMARY”)
        print(“=” * 60)
Â
        if “error” in result:
            print(f“Error: {result[‘error’]}”)
        else:
            print(f“Title: {result.get(‘title’, ‘N/A’)}”)
            print(“\nKey Points:”)
            for i, point in enumerate(result.get(“key_points”, []), 1):
                print(f”  {i}. {point}”)
            print(f“\nConclusion: {result.get(‘conclusion’, ‘N/A’)}”)
Â
        print(“\n” + “=” * 60)
        print(“Session replay available at: https://app.agentops.ai”)
        print(“Look for your session tagged ‘research-agent'”)
        print(“=” * 60)
Â
    except KeyboardInterrupt:
        # Clean session end if the user interrupts
        agentops.end_session(“Fail”)
        print(“\nSession ended by user. Partial trace saved to AgentOps.”)
Â
    except Exception as e:
        # Record failures so they show up in the dashboard
        agentops.end_session(“Fail”)
        print(f“Agent failed: {e}”)
        raise
