First-PR scope from #1. Single-process Python daemon that relays between Claude Code instances and chat-Claude (Anthropic API). Components: * relay.config — .env + config.yaml loader. Auto-generates ntfy topic on first run and persists it back to .env. * relay.state — atomic file I/O via tempfile + rename, advisory flock at state/.lock to enforce single-instance. * relay.conversation — append-only history with summarization. Triggers a summarize call when total chars exceed HISTORY_CHAR_CAP (default 400k); replaces history with the summary plus the most recent 10 turns. * relay.anthropic_client — SDK wrapper. Marks the system prompt cacheable (5-min ephemeral cache); concatenates text blocks; estimates per-call cost from the Anthropic price table with cache-write/read accounted for. * relay.queue — JSON envelope intake; oldest-by-mtime; malformed envelopes moved to queue/.rejected/. * relay.dispatch — one-input-at-a-time per session (dispatch/<session_id>/input.txt). Won't overwrite a pending dispatch; queues internally and waits for CC to delete. * relay.ntfy — best-effort POST to https://ntfy.sh/<topic>; failures logged but never block the main loop. * relay.daemon — main loop. Polls jc_input.txt (priority) then queue/. Detects [NEEDS-JC] in the first 200 chars of any response and pauses dispatch until JC writes jc_input.txt. JC override supports @session-N: prefix for direct dispatch without an API call. * relay.__main__ — CLI: relay run / relay status / relay topic. Tests: 57 unit tests pass (config, state, conversation, queue, dispatch, anthropic_client, ntfy, full daemon loop with a fake client). One real-API smoke test marked real_api, opt-in via pytest -m real_api; skips cleanly on credit-balance errors. Out of scope for this PR (deferred to follow-ups): Flask status endpoint, multi-session config in production, exponential backoff, systemd unit, cost-tracking aggregation. Closes #1. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
331 lines
12 KiB
Python
331 lines
12 KiB
Python
"""Main relay loop.
|
|
|
|
One process, one thread, polling-based. The loop:
|
|
|
|
1. Drain ``state/jc_input.txt`` if present (highest priority).
|
|
2. Drain the ``queue/`` directory oldest-first.
|
|
3. Heartbeat: check for stuck-queue alerts.
|
|
4. Sleep briefly, repeat.
|
|
|
|
Each turn (queue entry or jc_input) goes through ``handle_turn`` which:
|
|
|
|
1. Appends the user-side content to history.
|
|
2. Summarizes if history exceeds the cap.
|
|
3. Sends to the Anthropic API.
|
|
4. Appends the assistant response to history.
|
|
5. Routes the response: if it begins (within the first 200 chars) with
|
|
``[NEEDS-JC]``, set status to ``needs_jc`` and ntfy JC; otherwise
|
|
dispatch to the originating session.
|
|
|
|
The status flag is in-memory only (single process); it controls whether
|
|
new queue entries are processed while the daemon is paused waiting for
|
|
JC input. ``state/status.json`` mirrors it on disk for the future
|
|
status endpoint.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timezone
|
|
|
|
from relay.anthropic_client import AnthropicClient, TurnResult
|
|
from relay.config import Settings
|
|
from relay.conversation import Conversation
|
|
from relay.dispatch import DispatchManager
|
|
from relay.ntfy import notify, topic_url
|
|
from relay.queue import QueueEntry, ack, stuck_age_seconds, take_oldest
|
|
from relay.state import InstanceLock, write_json_atomic
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
NEEDS_JC_TOKEN = "[NEEDS-JC]"
|
|
NEEDS_JC_SCAN_CHARS = 200
|
|
JC_INPUT_FILE = "jc_input.txt"
|
|
STATUS_FILE = "status.json"
|
|
STUCK_QUEUE_THRESHOLD_SEC = 600 # 10 min per spec
|
|
STUCK_QUEUE_REPEAT_SEC = 600 # don't re-notify more often than this
|
|
LOOP_SLEEP_SEC = 1.0
|
|
DISPATCH_PREFIX = re.compile(r"^@(?P<session>[A-Za-z0-9_-]+):\s*", re.MULTILINE)
|
|
|
|
|
|
@dataclass
|
|
class DaemonStatus:
|
|
started_at: str = field(
|
|
default_factory=lambda: datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
)
|
|
state: str = "running" # running | needs_jc | error
|
|
last_needs_jc_at: str | None = None
|
|
last_needs_jc_text: str | None = None
|
|
last_dispatch_at: str | None = None
|
|
last_dispatch_session: str | None = None
|
|
queue_depth: int = 0
|
|
history_chars: int = 0
|
|
history_turns: int = 0
|
|
total_input_tokens: int = 0
|
|
total_output_tokens: int = 0
|
|
total_cost_usd: float = 0.0
|
|
last_stuck_alert_ts: float = 0.0
|
|
|
|
def as_dict(self) -> dict:
|
|
d = self.__dict__.copy()
|
|
d.pop("last_stuck_alert_ts", None)
|
|
return d
|
|
|
|
|
|
class Daemon:
|
|
def __init__(self, settings: Settings):
|
|
self.settings = settings
|
|
self.lock = InstanceLock(settings.state_dir / ".lock")
|
|
self.conversation = Conversation(settings.state_dir / "conversation.json")
|
|
self.dispatch = DispatchManager(settings.dispatch_dir)
|
|
self.client = AnthropicClient(api_key=settings.api_key, model=settings.model)
|
|
self.status = DaemonStatus()
|
|
self._stop = False
|
|
|
|
# ---- public API used by __main__ ----
|
|
|
|
def run(self) -> None:
|
|
self.lock.acquire()
|
|
try:
|
|
self._announce_startup()
|
|
while not self._stop:
|
|
try:
|
|
self._tick()
|
|
except Exception:
|
|
logger.exception("uncaught error in daemon loop; continuing")
|
|
self._notify_error(
|
|
"Daemon loop error",
|
|
"An uncaught exception was logged. Check logs/relay.log.",
|
|
)
|
|
self._persist_status()
|
|
time.sleep(LOOP_SLEEP_SEC)
|
|
finally:
|
|
self.lock.release()
|
|
|
|
def stop(self) -> None:
|
|
self._stop = True
|
|
|
|
# ---- internals ----
|
|
|
|
def _announce_startup(self) -> None:
|
|
url = topic_url(self.settings.ntfy_topic)
|
|
logger.info("=" * 72)
|
|
logger.info("relay daemon starting")
|
|
logger.info("model: %s", self.settings.model)
|
|
logger.info("status state: %s", self.settings.state_dir)
|
|
logger.info("ntfy topic: %s", url)
|
|
logger.info("Subscribe on phone/laptop to receive needs_jc + error alerts.")
|
|
logger.info("registered sessions: %s", [s.session_id for s in self.settings.sessions])
|
|
logger.info("history cap: %d chars", self.settings.history_char_cap)
|
|
logger.info("=" * 72)
|
|
notify(
|
|
self.settings.ntfy_topic,
|
|
title="relay daemon online",
|
|
message=f"model={self.settings.model}, sessions={len(self.settings.sessions)}",
|
|
tags=["robot"],
|
|
)
|
|
|
|
def _tick(self) -> None:
|
|
# 1) Try to flush any queued dispatches that were waiting on CC consumption.
|
|
self.dispatch.flush_all()
|
|
|
|
# 2) JC override always takes priority.
|
|
if self._handle_jc_input():
|
|
return
|
|
|
|
# 3) If paused for needs_jc, do nothing further on the queue.
|
|
if self.status.state == "needs_jc":
|
|
return
|
|
|
|
# 4) Drain queue (one entry per tick — keeps logs and dispatch ordering predictable).
|
|
entry = take_oldest(self.settings.queue_dir)
|
|
if entry is not None:
|
|
self._handle_queue_entry(entry)
|
|
|
|
# 5) Heartbeat: stuck-queue check.
|
|
self._check_stuck_queue()
|
|
|
|
def _handle_jc_input(self) -> bool:
|
|
path = self.settings.state_dir / JC_INPUT_FILE
|
|
if not path.exists():
|
|
return False
|
|
|
|
try:
|
|
content = path.read_text(encoding="utf-8")
|
|
except OSError as exc:
|
|
logger.error("could not read %s: %s", path, exc)
|
|
return False
|
|
|
|
path.unlink() # consume immediately so a slow API call doesn't double-process
|
|
if not content.strip():
|
|
logger.info("jc_input.txt was empty; ignoring")
|
|
return False
|
|
|
|
# Prefix routing: "@session-id: ..." dispatches directly without an API call.
|
|
match = DISPATCH_PREFIX.match(content)
|
|
if match:
|
|
session_id = match.group("session")
|
|
payload = content[match.end() :]
|
|
logger.info("JC override: direct dispatch to %s (%d chars)", session_id, len(payload))
|
|
self.dispatch.queue_or_write(session_id, payload)
|
|
self.status.last_dispatch_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
self.status.last_dispatch_session = session_id
|
|
# JC override clears any needs_jc pause.
|
|
self._clear_needs_jc()
|
|
return True
|
|
|
|
# No prefix → treat as next chat-side turn (JC speaking from chat).
|
|
logger.info("JC override: chat-side turn (%d chars)", len(content))
|
|
self._clear_needs_jc()
|
|
self._send_chat_turn(user_role_content=content, originating_session=None, source="jc")
|
|
return True
|
|
|
|
def _handle_queue_entry(self, entry: QueueEntry) -> None:
|
|
logger.info("queue entry from %s, %d chars", entry.session_id, len(entry.content))
|
|
try:
|
|
self._send_chat_turn(
|
|
user_role_content=entry.content,
|
|
originating_session=entry.session_id,
|
|
source="queue",
|
|
)
|
|
except Exception:
|
|
logger.exception(
|
|
"error processing queue entry %s; leaving in queue for retry", entry.path
|
|
)
|
|
return
|
|
ack(entry)
|
|
|
|
def _send_chat_turn(
|
|
self, *, user_role_content: str, originating_session: str | None, source: str
|
|
) -> None:
|
|
# Append the user-side turn to history before the API call so a crash
|
|
# mid-call doesn't lose the prompt.
|
|
self.conversation.append("user", user_role_content, session_id=originating_session)
|
|
|
|
# Summarize if we've outgrown the cap.
|
|
if self.conversation.needs_summarization(self.settings.history_char_cap):
|
|
self._summarize()
|
|
|
|
# API call.
|
|
result = self.client.send(
|
|
system_prompt=self.settings.system_prompt,
|
|
messages=self.conversation.to_api_messages(),
|
|
)
|
|
self._record_usage(result)
|
|
logger.info(
|
|
"[%s] api turn ok: in=%d out=%d cache_w=%d cache_r=%d cost=$%.4f",
|
|
source,
|
|
result.input_tokens,
|
|
result.output_tokens,
|
|
result.cache_creation_input_tokens,
|
|
result.cache_read_input_tokens,
|
|
result.estimated_cost_usd,
|
|
)
|
|
|
|
# Append assistant response.
|
|
self.conversation.append("assistant", result.text)
|
|
|
|
# Route: NEEDS-JC pause vs dispatch.
|
|
if self._contains_needs_jc(result.text):
|
|
self._enter_needs_jc(result.text)
|
|
return
|
|
|
|
target_session = originating_session or self._fallback_session_id()
|
|
if not target_session:
|
|
logger.warning(
|
|
"no originating session and no fallback session in config; chat reply dropped"
|
|
)
|
|
return
|
|
self.dispatch.queue_or_write(target_session, result.text)
|
|
self.status.last_dispatch_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
self.status.last_dispatch_session = target_session
|
|
|
|
def _summarize(self) -> None:
|
|
before = self.conversation.total_chars()
|
|
# Send the summarization as a fresh user turn appended to current history.
|
|
# The API responds with the summary; we then collapse history into
|
|
# [summary, last 10 turns].
|
|
self.conversation.append(
|
|
"user", self.settings.summarization_prompt, meta="summarize_request"
|
|
)
|
|
result = self.client.send(
|
|
system_prompt=self.settings.system_prompt,
|
|
messages=self.conversation.to_api_messages(),
|
|
)
|
|
self._record_usage(result)
|
|
# Replace history with summary + most-recent. This drops the
|
|
# summarize_request turn we just appended (it's only there to
|
|
# produce the summary; not useful in the rolling history).
|
|
self.conversation.replace_with_summary(result.text)
|
|
after = self.conversation.total_chars()
|
|
logger.info(
|
|
"summarization: %d chars -> %d chars (cost $%.4f)",
|
|
before,
|
|
after,
|
|
result.estimated_cost_usd,
|
|
)
|
|
|
|
def _contains_needs_jc(self, text: str) -> bool:
|
|
return NEEDS_JC_TOKEN in text[:NEEDS_JC_SCAN_CHARS]
|
|
|
|
def _enter_needs_jc(self, response_text: str) -> None:
|
|
self.status.state = "needs_jc"
|
|
self.status.last_needs_jc_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
|
|
self.status.last_needs_jc_text = response_text[:1000]
|
|
logger.warning("[NEEDS-JC] flagged; daemon paused awaiting state/jc_input.txt")
|
|
notify(
|
|
self.settings.ntfy_topic,
|
|
title="[NEEDS-JC] relay paused",
|
|
message=response_text[:400],
|
|
priority="high",
|
|
tags=["warning"],
|
|
)
|
|
|
|
def _clear_needs_jc(self) -> None:
|
|
if self.status.state != "running":
|
|
logger.info("clearing needs_jc state (was %s)", self.status.state)
|
|
self.status.state = "running"
|
|
|
|
def _fallback_session_id(self) -> str | None:
|
|
if self.settings.sessions:
|
|
return self.settings.sessions[0].session_id
|
|
return None
|
|
|
|
def _record_usage(self, result: TurnResult) -> None:
|
|
self.status.total_input_tokens += result.input_tokens
|
|
self.status.total_output_tokens += result.output_tokens
|
|
self.status.total_cost_usd += result.estimated_cost_usd
|
|
|
|
def _persist_status(self) -> None:
|
|
self.status.queue_depth = (
|
|
len(list((self.settings.queue_dir).iterdir()))
|
|
if self.settings.queue_dir.exists()
|
|
else 0
|
|
)
|
|
self.status.history_chars = self.conversation.total_chars()
|
|
self.status.history_turns = len(self.conversation.turns)
|
|
write_json_atomic(self.settings.state_dir / STATUS_FILE, self.status.as_dict())
|
|
|
|
def _check_stuck_queue(self) -> None:
|
|
age = stuck_age_seconds(self.settings.queue_dir)
|
|
if age <= STUCK_QUEUE_THRESHOLD_SEC:
|
|
return
|
|
now = time.time()
|
|
if now - self.status.last_stuck_alert_ts < STUCK_QUEUE_REPEAT_SEC:
|
|
return
|
|
self.status.last_stuck_alert_ts = now
|
|
logger.warning("queue stuck: oldest entry is %.0fs old", age)
|
|
notify(
|
|
self.settings.ntfy_topic,
|
|
title="relay queue stuck",
|
|
message=f"oldest entry is {int(age)}s old; daemon may be paused or the API failing.",
|
|
priority="high",
|
|
tags=["warning"],
|
|
)
|
|
|
|
def _notify_error(self, title: str, message: str) -> None:
|
|
notify(self.settings.ntfy_topic, title=title, message=message, priority="high", tags=["x"])
|