This repository has been archived on 2026-05-02. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
risv3-relay/relay/daemon.py
ac 540b4f5b01 feat: relay daemon skeleton — queue, dispatch, conversation, ntfy (#1)
First-PR scope from #1. Single-process Python daemon that relays
between Claude Code instances and chat-Claude (Anthropic API).

Components:

* relay.config — .env + config.yaml loader. Auto-generates ntfy
  topic on first run and persists it back to .env.
* relay.state — atomic file I/O via tempfile + rename, advisory
  flock at state/.lock to enforce single-instance.
* relay.conversation — append-only history with summarization.
  Triggers a summarize call when total chars exceed
  HISTORY_CHAR_CAP (default 400k); replaces history with the
  summary plus the most recent 10 turns.
* relay.anthropic_client — SDK wrapper. Marks the system prompt
  cacheable (5-min ephemeral cache); concatenates text blocks;
  estimates per-call cost from the Anthropic price table with
  cache-write/read accounted for.
* relay.queue — JSON envelope intake; oldest-by-mtime;
  malformed envelopes moved to queue/.rejected/.
* relay.dispatch — one-input-at-a-time per session
  (dispatch/<session_id>/input.txt). Won't overwrite a pending
  dispatch; queues internally and waits for CC to delete.
* relay.ntfy — best-effort POST to https://ntfy.sh/<topic>;
  failures logged but never block the main loop.
* relay.daemon — main loop. Polls jc_input.txt (priority) then
  queue/. Detects [NEEDS-JC] in the first 200 chars of any
  response and pauses dispatch until JC writes jc_input.txt.
  JC override supports @session-N: prefix for direct dispatch
  without an API call.
* relay.__main__ — CLI: relay run / relay status / relay topic.

Tests: 57 unit tests pass (config, state, conversation, queue,
dispatch, anthropic_client, ntfy, full daemon loop with a fake
client). One real-API smoke test marked real_api, opt-in via
pytest -m real_api; skips cleanly on credit-balance errors.

Out of scope for this PR (deferred to follow-ups): Flask status
endpoint, multi-session config in production, exponential
backoff, systemd unit, cost-tracking aggregation.

Closes #1.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-02 15:24:47 +00:00

331 lines
12 KiB
Python

"""Main relay loop.
One process, one thread, polling-based. The loop:
1. Drain ``state/jc_input.txt`` if present (highest priority).
2. Drain the ``queue/`` directory oldest-first.
3. Heartbeat: check for stuck-queue alerts.
4. Sleep briefly, repeat.
Each turn (queue entry or jc_input) goes through ``handle_turn`` which:
1. Appends the user-side content to history.
2. Summarizes if history exceeds the cap.
3. Sends to the Anthropic API.
4. Appends the assistant response to history.
5. Routes the response: if it begins (within the first 200 chars) with
``[NEEDS-JC]``, set status to ``needs_jc`` and ntfy JC; otherwise
dispatch to the originating session.
The status flag is in-memory only (single process); it controls whether
new queue entries are processed while the daemon is paused waiting for
JC input. ``state/status.json`` mirrors it on disk for the future
status endpoint.
"""
from __future__ import annotations
import logging
import re
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from relay.anthropic_client import AnthropicClient, TurnResult
from relay.config import Settings
from relay.conversation import Conversation
from relay.dispatch import DispatchManager
from relay.ntfy import notify, topic_url
from relay.queue import QueueEntry, ack, stuck_age_seconds, take_oldest
from relay.state import InstanceLock, write_json_atomic
logger = logging.getLogger(__name__)
NEEDS_JC_TOKEN = "[NEEDS-JC]"
NEEDS_JC_SCAN_CHARS = 200
JC_INPUT_FILE = "jc_input.txt"
STATUS_FILE = "status.json"
STUCK_QUEUE_THRESHOLD_SEC = 600 # 10 min per spec
STUCK_QUEUE_REPEAT_SEC = 600 # don't re-notify more often than this
LOOP_SLEEP_SEC = 1.0
DISPATCH_PREFIX = re.compile(r"^@(?P<session>[A-Za-z0-9_-]+):\s*", re.MULTILINE)
@dataclass
class DaemonStatus:
started_at: str = field(
default_factory=lambda: datetime.now(timezone.utc).isoformat(timespec="seconds")
)
state: str = "running" # running | needs_jc | error
last_needs_jc_at: str | None = None
last_needs_jc_text: str | None = None
last_dispatch_at: str | None = None
last_dispatch_session: str | None = None
queue_depth: int = 0
history_chars: int = 0
history_turns: int = 0
total_input_tokens: int = 0
total_output_tokens: int = 0
total_cost_usd: float = 0.0
last_stuck_alert_ts: float = 0.0
def as_dict(self) -> dict:
d = self.__dict__.copy()
d.pop("last_stuck_alert_ts", None)
return d
class Daemon:
def __init__(self, settings: Settings):
self.settings = settings
self.lock = InstanceLock(settings.state_dir / ".lock")
self.conversation = Conversation(settings.state_dir / "conversation.json")
self.dispatch = DispatchManager(settings.dispatch_dir)
self.client = AnthropicClient(api_key=settings.api_key, model=settings.model)
self.status = DaemonStatus()
self._stop = False
# ---- public API used by __main__ ----
def run(self) -> None:
self.lock.acquire()
try:
self._announce_startup()
while not self._stop:
try:
self._tick()
except Exception:
logger.exception("uncaught error in daemon loop; continuing")
self._notify_error(
"Daemon loop error",
"An uncaught exception was logged. Check logs/relay.log.",
)
self._persist_status()
time.sleep(LOOP_SLEEP_SEC)
finally:
self.lock.release()
def stop(self) -> None:
self._stop = True
# ---- internals ----
def _announce_startup(self) -> None:
url = topic_url(self.settings.ntfy_topic)
logger.info("=" * 72)
logger.info("relay daemon starting")
logger.info("model: %s", self.settings.model)
logger.info("status state: %s", self.settings.state_dir)
logger.info("ntfy topic: %s", url)
logger.info("Subscribe on phone/laptop to receive needs_jc + error alerts.")
logger.info("registered sessions: %s", [s.session_id for s in self.settings.sessions])
logger.info("history cap: %d chars", self.settings.history_char_cap)
logger.info("=" * 72)
notify(
self.settings.ntfy_topic,
title="relay daemon online",
message=f"model={self.settings.model}, sessions={len(self.settings.sessions)}",
tags=["robot"],
)
def _tick(self) -> None:
# 1) Try to flush any queued dispatches that were waiting on CC consumption.
self.dispatch.flush_all()
# 2) JC override always takes priority.
if self._handle_jc_input():
return
# 3) If paused for needs_jc, do nothing further on the queue.
if self.status.state == "needs_jc":
return
# 4) Drain queue (one entry per tick — keeps logs and dispatch ordering predictable).
entry = take_oldest(self.settings.queue_dir)
if entry is not None:
self._handle_queue_entry(entry)
# 5) Heartbeat: stuck-queue check.
self._check_stuck_queue()
def _handle_jc_input(self) -> bool:
path = self.settings.state_dir / JC_INPUT_FILE
if not path.exists():
return False
try:
content = path.read_text(encoding="utf-8")
except OSError as exc:
logger.error("could not read %s: %s", path, exc)
return False
path.unlink() # consume immediately so a slow API call doesn't double-process
if not content.strip():
logger.info("jc_input.txt was empty; ignoring")
return False
# Prefix routing: "@session-id: ..." dispatches directly without an API call.
match = DISPATCH_PREFIX.match(content)
if match:
session_id = match.group("session")
payload = content[match.end() :]
logger.info("JC override: direct dispatch to %s (%d chars)", session_id, len(payload))
self.dispatch.queue_or_write(session_id, payload)
self.status.last_dispatch_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
self.status.last_dispatch_session = session_id
# JC override clears any needs_jc pause.
self._clear_needs_jc()
return True
# No prefix → treat as next chat-side turn (JC speaking from chat).
logger.info("JC override: chat-side turn (%d chars)", len(content))
self._clear_needs_jc()
self._send_chat_turn(user_role_content=content, originating_session=None, source="jc")
return True
def _handle_queue_entry(self, entry: QueueEntry) -> None:
logger.info("queue entry from %s, %d chars", entry.session_id, len(entry.content))
try:
self._send_chat_turn(
user_role_content=entry.content,
originating_session=entry.session_id,
source="queue",
)
except Exception:
logger.exception(
"error processing queue entry %s; leaving in queue for retry", entry.path
)
return
ack(entry)
def _send_chat_turn(
self, *, user_role_content: str, originating_session: str | None, source: str
) -> None:
# Append the user-side turn to history before the API call so a crash
# mid-call doesn't lose the prompt.
self.conversation.append("user", user_role_content, session_id=originating_session)
# Summarize if we've outgrown the cap.
if self.conversation.needs_summarization(self.settings.history_char_cap):
self._summarize()
# API call.
result = self.client.send(
system_prompt=self.settings.system_prompt,
messages=self.conversation.to_api_messages(),
)
self._record_usage(result)
logger.info(
"[%s] api turn ok: in=%d out=%d cache_w=%d cache_r=%d cost=$%.4f",
source,
result.input_tokens,
result.output_tokens,
result.cache_creation_input_tokens,
result.cache_read_input_tokens,
result.estimated_cost_usd,
)
# Append assistant response.
self.conversation.append("assistant", result.text)
# Route: NEEDS-JC pause vs dispatch.
if self._contains_needs_jc(result.text):
self._enter_needs_jc(result.text)
return
target_session = originating_session or self._fallback_session_id()
if not target_session:
logger.warning(
"no originating session and no fallback session in config; chat reply dropped"
)
return
self.dispatch.queue_or_write(target_session, result.text)
self.status.last_dispatch_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
self.status.last_dispatch_session = target_session
def _summarize(self) -> None:
before = self.conversation.total_chars()
# Send the summarization as a fresh user turn appended to current history.
# The API responds with the summary; we then collapse history into
# [summary, last 10 turns].
self.conversation.append(
"user", self.settings.summarization_prompt, meta="summarize_request"
)
result = self.client.send(
system_prompt=self.settings.system_prompt,
messages=self.conversation.to_api_messages(),
)
self._record_usage(result)
# Replace history with summary + most-recent. This drops the
# summarize_request turn we just appended (it's only there to
# produce the summary; not useful in the rolling history).
self.conversation.replace_with_summary(result.text)
after = self.conversation.total_chars()
logger.info(
"summarization: %d chars -> %d chars (cost $%.4f)",
before,
after,
result.estimated_cost_usd,
)
def _contains_needs_jc(self, text: str) -> bool:
return NEEDS_JC_TOKEN in text[:NEEDS_JC_SCAN_CHARS]
def _enter_needs_jc(self, response_text: str) -> None:
self.status.state = "needs_jc"
self.status.last_needs_jc_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
self.status.last_needs_jc_text = response_text[:1000]
logger.warning("[NEEDS-JC] flagged; daemon paused awaiting state/jc_input.txt")
notify(
self.settings.ntfy_topic,
title="[NEEDS-JC] relay paused",
message=response_text[:400],
priority="high",
tags=["warning"],
)
def _clear_needs_jc(self) -> None:
if self.status.state != "running":
logger.info("clearing needs_jc state (was %s)", self.status.state)
self.status.state = "running"
def _fallback_session_id(self) -> str | None:
if self.settings.sessions:
return self.settings.sessions[0].session_id
return None
def _record_usage(self, result: TurnResult) -> None:
self.status.total_input_tokens += result.input_tokens
self.status.total_output_tokens += result.output_tokens
self.status.total_cost_usd += result.estimated_cost_usd
def _persist_status(self) -> None:
self.status.queue_depth = (
len(list((self.settings.queue_dir).iterdir()))
if self.settings.queue_dir.exists()
else 0
)
self.status.history_chars = self.conversation.total_chars()
self.status.history_turns = len(self.conversation.turns)
write_json_atomic(self.settings.state_dir / STATUS_FILE, self.status.as_dict())
def _check_stuck_queue(self) -> None:
age = stuck_age_seconds(self.settings.queue_dir)
if age <= STUCK_QUEUE_THRESHOLD_SEC:
return
now = time.time()
if now - self.status.last_stuck_alert_ts < STUCK_QUEUE_REPEAT_SEC:
return
self.status.last_stuck_alert_ts = now
logger.warning("queue stuck: oldest entry is %.0fs old", age)
notify(
self.settings.ntfy_topic,
title="relay queue stuck",
message=f"oldest entry is {int(age)}s old; daemon may be paused or the API failing.",
priority="high",
tags=["warning"],
)
def _notify_error(self, title: str, message: str) -> None:
notify(self.settings.ntfy_topic, title=title, message=message, priority="high", tags=["x"])