"""
subagent_output_capture.py

Reads a completed subagent's session JSONL transcript and extracts the original
task (first user message) and final output (last assistant message), writing
the result to state/subagent_outputs/<session_key>.json. Zero API calls.
"""

import sys
import json
import os
import glob
from datetime import datetime, timezone


def extract_text(content):
    """Extract plain text from content that may be a string or list of typed blocks."""
    if isinstance(content, str):
        return content
    if isinstance(content, list):
        parts = []
        for block in content:
            if isinstance(block, dict) and block.get("type") == "text":
                parts.append(block.get("text", ""))
        return "\n".join(parts)
    return str(content)


def find_session_file(session_key):
    """Return path to the best session JSONL file for the given key."""
    base_dir = os.path.expanduser("~/.openclaw/agents/main/sessions")
    primary = os.path.join(base_dir, f"{session_key}.jsonl")
    if os.path.exists(primary):
        return primary

    # Check archived files, pick most recent
    pattern = os.path.join(base_dir, f"{session_key}.archived-*.jsonl")
    archived = glob.glob(pattern)
    if archived:
        return max(archived, key=os.path.getmtime)

    return None


def parse_transcript(path):
    """Parse a JSONL transcript and return list of {role, content} dicts."""
    messages = []
    skipped = 0
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            try:
                obj = json.loads(line)
            except json.JSONDecodeError:
                skipped += 1
                continue
            msg = obj.get("message", {})
            role = msg.get("role")
            content = msg.get("content")
            if role and content is not None:
                messages.append({"role": role, "content": content})
    if skipped:
        print(f"WARNING: skipped {skipped} malformed JSONL line(s) in {path}", file=sys.stderr)
    return messages


def main():
    if len(sys.argv) < 2:
        print("ERROR: missing session_key argument")
        print(f"Usage: python3 {sys.argv[0]} <session_key>")
        sys.exit(1)

    session_key = sys.argv[1]

    # Sanitize session_key to prevent path traversal
    if any(c in session_key for c in ('/', '\\', '..', '\x00')):
        print(f"ERROR: invalid session_key '{session_key}'")
        sys.exit(1)

    session_file = find_session_file(session_key)
    if not session_file:
        print(f"ERROR: no session file found for key '{session_key}'")
        sys.exit(1)

    try:
        messages = parse_transcript(session_file)
    except Exception as e:
        print(f"ERROR: failed to parse transcript: {e}")
        sys.exit(1)

    if not messages:
        print("ERROR: transcript is empty or contains no valid messages")
        sys.exit(1)

    # First user message
    original_task = ""
    for msg in messages:
        if msg["role"] == "user":
            original_task = extract_text(msg["content"])[:2000]
            break

    # Last assistant message
    final_output = ""
    for msg in reversed(messages):
        if msg["role"] == "assistant":
            final_output = extract_text(msg["content"])[:3000]
            break

    if not original_task:
        print("ERROR: no user message found in transcript")
        sys.exit(1)
    if not final_output:
        print("ERROR: no assistant message found in transcript")
        sys.exit(1)

    workspace = os.environ.get(
        "CLAWSTIN_WORKSPACE",
        os.path.expanduser("~/.openclaw/workspace")
    )
    output_dir = os.path.join(workspace, "state", "subagent_outputs")
    os.makedirs(output_dir, exist_ok=True)

    # Track truncation
    raw_task = extract_text(next((m["content"] for m in messages if m["role"] == "user"), ""))
    raw_output = extract_text(next((m["content"] for m in reversed(messages) if m["role"] == "assistant"), ""))

    output_path = os.path.join(output_dir, f"{session_key}.json")
    result = {
        "session_key": session_key,
        "original_task": original_task,
        "original_task_truncated": len(raw_task) > 2000,
        "final_output": final_output,
        "final_output_truncated": len(raw_output) > 3000,
        "captured_at": datetime.now(timezone.utc).isoformat(),
    }

    try:
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(result, f, indent=2, ensure_ascii=False)
    except Exception as e:
        print(f"ERROR: failed to write output: {e}")
        sys.exit(1)

    print(f"CAPTURED: {output_path}")
    sys.exit(0)


if __name__ == "__main__":
    main()
