feat: relay daemon skeleton — queue, dispatch, conversation, ntfy (#1)

First-PR scope from #1. Single-process Python daemon that relays
between Claude Code instances and chat-Claude (Anthropic API).

Components:

* relay.config — .env + config.yaml loader. Auto-generates ntfy
  topic on first run and persists it back to .env.
* relay.state — atomic file I/O via tempfile + rename, advisory
  flock at state/.lock to enforce single-instance.
* relay.conversation — append-only history with summarization.
  Triggers a summarize call when total chars exceed
  HISTORY_CHAR_CAP (default 400k); replaces history with the
  summary plus the most recent 10 turns.
* relay.anthropic_client — SDK wrapper. Marks the system prompt
  cacheable (5-min ephemeral cache); concatenates text blocks;
  estimates per-call cost from the Anthropic price table with
  cache-write/read accounted for.
* relay.queue — JSON envelope intake; oldest-by-mtime;
  malformed envelopes moved to queue/.rejected/.
* relay.dispatch — one-input-at-a-time per session
  (dispatch/<session_id>/input.txt). Won't overwrite a pending
  dispatch; queues internally and waits for CC to delete.
* relay.ntfy — best-effort POST to https://ntfy.sh/<topic>;
  failures logged but never block the main loop.
* relay.daemon — main loop. Polls jc_input.txt (priority) then
  queue/. Detects [NEEDS-JC] in the first 200 chars of any
  response and pauses dispatch until JC writes jc_input.txt.
  JC override supports @session-N: prefix for direct dispatch
  without an API call.
* relay.__main__ — CLI: relay run / relay status / relay topic.

Tests: 57 unit tests pass (config, state, conversation, queue,
dispatch, anthropic_client, ntfy, full daemon loop with a fake
client). One real-API smoke test marked real_api, opt-in via
pytest -m real_api; skips cleanly on credit-balance errors.

Out of scope for this PR (deferred to follow-ups): Flask status
endpoint, multi-session config in production, exponential
backoff, systemd unit, cost-tracking aggregation.

Closes #1.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
ac
2026-05-02 15:24:47 +00:00
parent 50d957e14e
commit 540b4f5b01
24 changed files with 2205 additions and 8 deletions

0
relay/__init__.py Normal file
View File

75
relay/__main__.py Normal file
View File

@@ -0,0 +1,75 @@
"""CLI entry point: ``relay <command>``."""
from __future__ import annotations
import argparse
import json
import logging
import signal
import sys
from relay.config import load_settings
from relay.daemon import Daemon
from relay.logs import configure as configure_logs
from relay.ntfy import topic_url
from relay.state import read_json
def _cmd_run(args: argparse.Namespace) -> int:
settings = load_settings()
configure_logs(settings.logs_dir, level=logging.DEBUG if args.verbose else logging.INFO)
daemon = Daemon(settings)
def _stop(signum: int, _frame: object) -> None:
logging.getLogger(__name__).info("received signal %s; shutting down", signum)
daemon.stop()
signal.signal(signal.SIGINT, _stop)
signal.signal(signal.SIGTERM, _stop)
try:
daemon.run()
except RuntimeError as exc:
# Most commonly the lock file: another daemon is running.
print(f"error: {exc}", file=sys.stderr)
return 1
return 0
def _cmd_status(_args: argparse.Namespace) -> int:
settings = load_settings()
status_path = settings.state_dir / "status.json"
payload = read_json(status_path, default=None)
if payload is None:
print("no status file yet — has the daemon run?", file=sys.stderr)
return 1
print(json.dumps(payload, indent=2, sort_keys=False))
return 0
def _cmd_topic(_args: argparse.Namespace) -> int:
settings = load_settings()
print(topic_url(settings.ntfy_topic))
return 0
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(prog="relay", description="risv3-relay daemon")
sub = parser.add_subparsers(dest="cmd", required=True)
run = sub.add_parser("run", help="Run the daemon in the foreground")
run.add_argument("-v", "--verbose", action="store_true")
run.set_defaults(func=_cmd_run)
status = sub.add_parser("status", help="Print the current daemon status")
status.set_defaults(func=_cmd_status)
topic = sub.add_parser("topic", help="Print the ntfy subscription URL")
topic.set_defaults(func=_cmd_topic)
args = parser.parse_args(argv)
return args.func(args)
if __name__ == "__main__":
raise SystemExit(main())

119
relay/anthropic_client.py Normal file
View File

@@ -0,0 +1,119 @@
"""Anthropic API wrapper.
Wraps the SDK call so the daemon can send a turn and get back the
assistant text + token usage + estimated cost. Implements prompt
caching on the system prompt: subsequent calls within the 5-minute TTL
get a cache hit on the (large, repeated) system prompt and pay the
cheaper cache-hit rate. Per-turn user/assistant content is never marked
cacheable because it changes every call.
Cost estimation uses a model→price table; the table is the source of
truth and is easy to update when pricing changes.
"""
from __future__ import annotations
from dataclasses import dataclass
from anthropic import Anthropic
from anthropic.types import Message
# Pricing per 1M tokens (USD), pulled from Anthropic's published schedule.
# Cache-hit input is billed at the cache-read rate (~10% of standard input).
# Cache-write is ~25% more than standard input. These numbers are
# approximate and used only for log-line cost estimation; the source of
# truth for billing is Anthropic's invoice.
_PRICES_PER_MILLION: dict[str, dict[str, float]] = {
"claude-opus-4-7": {"input": 15.0, "output": 75.0, "cache_write": 18.75, "cache_read": 1.50},
"claude-opus-4-7-1m": {"input": 15.0, "output": 75.0, "cache_write": 18.75, "cache_read": 1.50},
"claude-sonnet-4-6": {"input": 3.0, "output": 15.0, "cache_write": 3.75, "cache_read": 0.30},
"claude-haiku-4-5-20251001": {
"input": 0.80,
"output": 4.0,
"cache_write": 1.00,
"cache_read": 0.08,
},
}
def _price_for(model: str) -> dict[str, float]:
if model in _PRICES_PER_MILLION:
return _PRICES_PER_MILLION[model]
# Fallback: charge as Opus 4.7 (worst-case) so estimates don't
# under-report for unknown models. Logged once at startup.
return _PRICES_PER_MILLION["claude-opus-4-7"]
@dataclass
class TurnResult:
text: str
input_tokens: int
output_tokens: int
cache_creation_input_tokens: int
cache_read_input_tokens: int
estimated_cost_usd: float
raw: Message
@dataclass
class AnthropicClient:
api_key: str
model: str
max_output_tokens: int = 4096
def __post_init__(self) -> None:
self._sdk = Anthropic(api_key=self.api_key)
def send(
self,
*,
system_prompt: str,
messages: list[dict[str, str]],
) -> TurnResult:
"""Send a single API turn. The system prompt is marked cacheable.
``messages`` is the full conversation history shaped for the
Messages API. The most recent message is the user turn we're
responding to. The daemon is responsible for appending the
assistant text it gets back into history before the next call.
"""
response = self._sdk.messages.create(
model=self.model,
max_tokens=self.max_output_tokens,
system=[
{
"type": "text",
"text": system_prompt,
"cache_control": {"type": "ephemeral"},
}
],
messages=messages,
)
text = "".join(
block.text for block in response.content if getattr(block, "type", None) == "text"
)
usage = response.usage
in_tokens = int(getattr(usage, "input_tokens", 0) or 0)
out_tokens = int(getattr(usage, "output_tokens", 0) or 0)
cache_create = int(getattr(usage, "cache_creation_input_tokens", 0) or 0)
cache_read = int(getattr(usage, "cache_read_input_tokens", 0) or 0)
prices = _price_for(self.model)
cost = (
in_tokens * prices["input"]
+ out_tokens * prices["output"]
+ cache_create * prices["cache_write"]
+ cache_read * prices["cache_read"]
) / 1_000_000.0
return TurnResult(
text=text,
input_tokens=in_tokens,
output_tokens=out_tokens,
cache_creation_input_tokens=cache_create,
cache_read_input_tokens=cache_read,
estimated_cost_usd=cost,
raw=response,
)

163
relay/config.py Normal file
View File

@@ -0,0 +1,163 @@
"""Settings + config.yaml loader.
The relay reads two config sources:
1. ``.env`` for secrets (API key) and per-host overrides (status port,
history cap). The ntfy topic is auto-generated on first run and
written back to ``.env`` so it persists across restarts.
2. ``config.yaml`` for project-level settings: the registered CC
sessions, the system prompt, the summarization prompt. First-PR
default config is generated if the file doesn't exist, with one
``session-1`` registered.
Settings are read once at startup; the daemon does not hot-reload them.
"""
from __future__ import annotations
import os
import secrets
from dataclasses import dataclass, field
from pathlib import Path
import yaml
from dotenv import dotenv_values, set_key
REPO_ROOT = Path(__file__).resolve().parent.parent
ENV_FILE = REPO_ROOT / ".env"
CONFIG_FILE = REPO_ROOT / "config.yaml"
DEFAULT_SYSTEM_PROMPT = (
"You are the chat-side counterpart to Claude Code instances working on the "
"risv3 project. CC sends you its progress; you respond as a project lead "
"would: check decisions, ask clarifying questions, approve or correct. "
"When a CC turn raises a question only JC (the human owner) can answer, "
"begin your response with the literal token [NEEDS-JC] so the relay daemon "
"pauses and notifies JC. Otherwise reply normally and the relay forwards "
"your reply to the originating CC session."
)
DEFAULT_SUMMARIZATION_PROMPT = (
"Summarize the conversation so far. Preserve project context, decisions "
"made, open work items, and any outstanding [NEEDS-JC] questions. The "
"summary will replace earlier turns in the conversation history; the most "
"recent turns will be retained verbatim. Be specific where specificity "
"matters (file paths, issue numbers, decisions); be brief on routine "
"back-and-forth."
)
@dataclass(frozen=True)
class SessionConfig:
"""One registered CC session."""
session_id: str
working_dir: str | None = None
description: str | None = None
@dataclass(frozen=True)
class Settings:
api_key: str
model: str
ntfy_topic: str
status_port: int
history_char_cap: int
repo_root: Path
queue_dir: Path
dispatch_dir: Path
state_dir: Path
logs_dir: Path
system_prompt: str
summarization_prompt: str
sessions: tuple[SessionConfig, ...] = field(default_factory=tuple)
def _ensure_runtime_dirs(repo_root: Path) -> None:
for sub in ("queue", "dispatch", "state", "logs"):
(repo_root / sub).mkdir(exist_ok=True)
def _generate_ntfy_topic() -> str:
"""Cryptographically random 16-character topic. Functionally a password."""
return secrets.token_urlsafe(12)
def _load_or_init_ntfy_topic(env_values: dict[str, str | None]) -> str:
raw = (env_values.get("NTFY_TOPIC") or "").strip()
# Defensive: dotenv treats everything after = as the value, so an
# inline `#` comment ends up as the topic. Treat any value that
# starts with `#` (or contains whitespace, which a real topic never
# does) as empty.
if raw and not raw.startswith("#") and " " not in raw:
return raw
topic = _generate_ntfy_topic()
set_key(str(ENV_FILE), "NTFY_TOPIC", topic, quote_mode="never")
return topic
def _load_yaml_config() -> dict:
if not CONFIG_FILE.exists():
default = {
"system_prompt": DEFAULT_SYSTEM_PROMPT,
"summarization_prompt": DEFAULT_SUMMARIZATION_PROMPT,
"sessions": [
{
"session_id": "session-1",
"working_dir": None,
"description": "Default CC session",
}
],
}
CONFIG_FILE.write_text(yaml.safe_dump(default, sort_keys=False))
return default
with CONFIG_FILE.open() as f:
return yaml.safe_load(f) or {}
def load_settings() -> Settings:
"""Read .env + config.yaml. Mutates .env on first run to record the ntfy topic."""
env_values = dotenv_values(ENV_FILE)
api_key = (
env_values.get("ANTHROPIC_API_KEY") or os.environ.get("ANTHROPIC_API_KEY") or ""
).strip()
if not api_key:
raise RuntimeError(f"ANTHROPIC_API_KEY missing from {ENV_FILE}")
model = (env_values.get("ANTHROPIC_MODEL") or "claude-opus-4-7").strip()
status_port = int((env_values.get("STATUS_PORT") or "8765").strip())
history_char_cap = int((env_values.get("HISTORY_CHAR_CAP") or "400000").strip())
ntfy_topic = _load_or_init_ntfy_topic(env_values)
yaml_cfg = _load_yaml_config()
system_prompt = str(yaml_cfg.get("system_prompt") or DEFAULT_SYSTEM_PROMPT)
summarization_prompt = str(yaml_cfg.get("summarization_prompt") or DEFAULT_SUMMARIZATION_PROMPT)
sessions_cfg = yaml_cfg.get("sessions") or []
sessions = tuple(
SessionConfig(
session_id=str(s["session_id"]),
working_dir=s.get("working_dir"),
description=s.get("description"),
)
for s in sessions_cfg
)
_ensure_runtime_dirs(REPO_ROOT)
return Settings(
api_key=api_key,
model=model,
ntfy_topic=ntfy_topic,
status_port=status_port,
history_char_cap=history_char_cap,
repo_root=REPO_ROOT,
queue_dir=REPO_ROOT / "queue",
dispatch_dir=REPO_ROOT / "dispatch",
state_dir=REPO_ROOT / "state",
logs_dir=REPO_ROOT / "logs",
system_prompt=system_prompt,
summarization_prompt=summarization_prompt,
sessions=sessions,
)

99
relay/conversation.py Normal file
View File

@@ -0,0 +1,99 @@
"""Conversation history with summarization.
History shape: list of turns ``{role, content, ts, session_id?}`` where
``role`` is ``"user"`` or ``"assistant"``. The user role represents
either CC output or JC override; the assistant role represents the
chat-Claude response.
Summarization fires when the total content character count exceeds
``HISTORY_CHAR_CAP``. The summarization prompt is sent as a normal user
turn, the API's response replaces all earlier turns, and the most
recent ``RECENT_TURNS_KEPT`` turns are appended verbatim. The summary
turn is marked with ``meta="summary"`` so the daemon can recognize it
when paginating.
"""
from __future__ import annotations
from dataclasses import asdict, dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any
from relay.state import read_json, write_json_atomic
RECENT_TURNS_KEPT = 10
@dataclass
class Turn:
role: str # 'user' | 'assistant'
content: str
ts: str = field(default_factory=lambda: datetime.utcnow().isoformat(timespec="seconds") + "Z")
session_id: str | None = None
meta: str | None = None # 'summary' for replaced summary turns
def to_api_message(self) -> dict[str, str]:
"""Anthropic Messages API shape — only role + content needed."""
return {"role": self.role, "content": self.content}
class Conversation:
"""In-memory + on-disk conversation history."""
def __init__(self, history_path: Path):
self._path = history_path
raw = read_json(self._path, default=[])
if not isinstance(raw, list):
raise ValueError(f"Expected list at {self._path}, got {type(raw).__name__}")
self._turns: list[Turn] = [Turn(**dict(t)) for t in raw]
@property
def turns(self) -> list[Turn]:
return list(self._turns)
def append(
self, role: str, content: str, *, session_id: str | None = None, meta: str | None = None
) -> Turn:
turn = Turn(role=role, content=content, session_id=session_id, meta=meta)
self._turns.append(turn)
self._persist()
return turn
def replace_with_summary(self, summary_text: str) -> None:
"""Replace all but the last RECENT_TURNS_KEPT turns with one summary turn."""
recent = self._turns[-RECENT_TURNS_KEPT:] if len(self._turns) > RECENT_TURNS_KEPT else []
summary_turn = Turn(role="assistant", content=summary_text, meta="summary")
self._turns = [summary_turn, *recent]
self._persist()
def total_chars(self) -> int:
return sum(len(t.content) for t in self._turns)
def needs_summarization(self, cap: int) -> bool:
return self.total_chars() > cap
def to_api_messages(self) -> list[dict[str, str]]:
return [t.to_api_message() for t in self._turns]
def _persist(self) -> None:
write_json_atomic(self._path, [asdict(t) for t in self._turns])
# Test/debug helper
def reset(self) -> None:
self._turns = []
self._persist()
def render_for_log(turn: Turn, max_chars: int = 200) -> dict[str, Any]:
"""Compact representation for log lines — full content elided."""
content = turn.content
if len(content) > max_chars:
content = content[:max_chars] + f"... <{len(turn.content) - max_chars} more chars>"
return {
"role": turn.role,
"ts": turn.ts,
"session_id": turn.session_id,
"meta": turn.meta,
"content": content,
}

330
relay/daemon.py Normal file
View File

@@ -0,0 +1,330 @@
"""Main relay loop.
One process, one thread, polling-based. The loop:
1. Drain ``state/jc_input.txt`` if present (highest priority).
2. Drain the ``queue/`` directory oldest-first.
3. Heartbeat: check for stuck-queue alerts.
4. Sleep briefly, repeat.
Each turn (queue entry or jc_input) goes through ``handle_turn`` which:
1. Appends the user-side content to history.
2. Summarizes if history exceeds the cap.
3. Sends to the Anthropic API.
4. Appends the assistant response to history.
5. Routes the response: if it begins (within the first 200 chars) with
``[NEEDS-JC]``, set status to ``needs_jc`` and ntfy JC; otherwise
dispatch to the originating session.
The status flag is in-memory only (single process); it controls whether
new queue entries are processed while the daemon is paused waiting for
JC input. ``state/status.json`` mirrors it on disk for the future
status endpoint.
"""
from __future__ import annotations
import logging
import re
import time
from dataclasses import dataclass, field
from datetime import datetime, timezone
from relay.anthropic_client import AnthropicClient, TurnResult
from relay.config import Settings
from relay.conversation import Conversation
from relay.dispatch import DispatchManager
from relay.ntfy import notify, topic_url
from relay.queue import QueueEntry, ack, stuck_age_seconds, take_oldest
from relay.state import InstanceLock, write_json_atomic
logger = logging.getLogger(__name__)
NEEDS_JC_TOKEN = "[NEEDS-JC]"
NEEDS_JC_SCAN_CHARS = 200
JC_INPUT_FILE = "jc_input.txt"
STATUS_FILE = "status.json"
STUCK_QUEUE_THRESHOLD_SEC = 600 # 10 min per spec
STUCK_QUEUE_REPEAT_SEC = 600 # don't re-notify more often than this
LOOP_SLEEP_SEC = 1.0
DISPATCH_PREFIX = re.compile(r"^@(?P<session>[A-Za-z0-9_-]+):\s*", re.MULTILINE)
@dataclass
class DaemonStatus:
started_at: str = field(
default_factory=lambda: datetime.now(timezone.utc).isoformat(timespec="seconds")
)
state: str = "running" # running | needs_jc | error
last_needs_jc_at: str | None = None
last_needs_jc_text: str | None = None
last_dispatch_at: str | None = None
last_dispatch_session: str | None = None
queue_depth: int = 0
history_chars: int = 0
history_turns: int = 0
total_input_tokens: int = 0
total_output_tokens: int = 0
total_cost_usd: float = 0.0
last_stuck_alert_ts: float = 0.0
def as_dict(self) -> dict:
d = self.__dict__.copy()
d.pop("last_stuck_alert_ts", None)
return d
class Daemon:
def __init__(self, settings: Settings):
self.settings = settings
self.lock = InstanceLock(settings.state_dir / ".lock")
self.conversation = Conversation(settings.state_dir / "conversation.json")
self.dispatch = DispatchManager(settings.dispatch_dir)
self.client = AnthropicClient(api_key=settings.api_key, model=settings.model)
self.status = DaemonStatus()
self._stop = False
# ---- public API used by __main__ ----
def run(self) -> None:
self.lock.acquire()
try:
self._announce_startup()
while not self._stop:
try:
self._tick()
except Exception:
logger.exception("uncaught error in daemon loop; continuing")
self._notify_error(
"Daemon loop error",
"An uncaught exception was logged. Check logs/relay.log.",
)
self._persist_status()
time.sleep(LOOP_SLEEP_SEC)
finally:
self.lock.release()
def stop(self) -> None:
self._stop = True
# ---- internals ----
def _announce_startup(self) -> None:
url = topic_url(self.settings.ntfy_topic)
logger.info("=" * 72)
logger.info("relay daemon starting")
logger.info("model: %s", self.settings.model)
logger.info("status state: %s", self.settings.state_dir)
logger.info("ntfy topic: %s", url)
logger.info("Subscribe on phone/laptop to receive needs_jc + error alerts.")
logger.info("registered sessions: %s", [s.session_id for s in self.settings.sessions])
logger.info("history cap: %d chars", self.settings.history_char_cap)
logger.info("=" * 72)
notify(
self.settings.ntfy_topic,
title="relay daemon online",
message=f"model={self.settings.model}, sessions={len(self.settings.sessions)}",
tags=["robot"],
)
def _tick(self) -> None:
# 1) Try to flush any queued dispatches that were waiting on CC consumption.
self.dispatch.flush_all()
# 2) JC override always takes priority.
if self._handle_jc_input():
return
# 3) If paused for needs_jc, do nothing further on the queue.
if self.status.state == "needs_jc":
return
# 4) Drain queue (one entry per tick — keeps logs and dispatch ordering predictable).
entry = take_oldest(self.settings.queue_dir)
if entry is not None:
self._handle_queue_entry(entry)
# 5) Heartbeat: stuck-queue check.
self._check_stuck_queue()
def _handle_jc_input(self) -> bool:
path = self.settings.state_dir / JC_INPUT_FILE
if not path.exists():
return False
try:
content = path.read_text(encoding="utf-8")
except OSError as exc:
logger.error("could not read %s: %s", path, exc)
return False
path.unlink() # consume immediately so a slow API call doesn't double-process
if not content.strip():
logger.info("jc_input.txt was empty; ignoring")
return False
# Prefix routing: "@session-id: ..." dispatches directly without an API call.
match = DISPATCH_PREFIX.match(content)
if match:
session_id = match.group("session")
payload = content[match.end() :]
logger.info("JC override: direct dispatch to %s (%d chars)", session_id, len(payload))
self.dispatch.queue_or_write(session_id, payload)
self.status.last_dispatch_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
self.status.last_dispatch_session = session_id
# JC override clears any needs_jc pause.
self._clear_needs_jc()
return True
# No prefix → treat as next chat-side turn (JC speaking from chat).
logger.info("JC override: chat-side turn (%d chars)", len(content))
self._clear_needs_jc()
self._send_chat_turn(user_role_content=content, originating_session=None, source="jc")
return True
def _handle_queue_entry(self, entry: QueueEntry) -> None:
logger.info("queue entry from %s, %d chars", entry.session_id, len(entry.content))
try:
self._send_chat_turn(
user_role_content=entry.content,
originating_session=entry.session_id,
source="queue",
)
except Exception:
logger.exception(
"error processing queue entry %s; leaving in queue for retry", entry.path
)
return
ack(entry)
def _send_chat_turn(
self, *, user_role_content: str, originating_session: str | None, source: str
) -> None:
# Append the user-side turn to history before the API call so a crash
# mid-call doesn't lose the prompt.
self.conversation.append("user", user_role_content, session_id=originating_session)
# Summarize if we've outgrown the cap.
if self.conversation.needs_summarization(self.settings.history_char_cap):
self._summarize()
# API call.
result = self.client.send(
system_prompt=self.settings.system_prompt,
messages=self.conversation.to_api_messages(),
)
self._record_usage(result)
logger.info(
"[%s] api turn ok: in=%d out=%d cache_w=%d cache_r=%d cost=$%.4f",
source,
result.input_tokens,
result.output_tokens,
result.cache_creation_input_tokens,
result.cache_read_input_tokens,
result.estimated_cost_usd,
)
# Append assistant response.
self.conversation.append("assistant", result.text)
# Route: NEEDS-JC pause vs dispatch.
if self._contains_needs_jc(result.text):
self._enter_needs_jc(result.text)
return
target_session = originating_session or self._fallback_session_id()
if not target_session:
logger.warning(
"no originating session and no fallback session in config; chat reply dropped"
)
return
self.dispatch.queue_or_write(target_session, result.text)
self.status.last_dispatch_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
self.status.last_dispatch_session = target_session
def _summarize(self) -> None:
before = self.conversation.total_chars()
# Send the summarization as a fresh user turn appended to current history.
# The API responds with the summary; we then collapse history into
# [summary, last 10 turns].
self.conversation.append(
"user", self.settings.summarization_prompt, meta="summarize_request"
)
result = self.client.send(
system_prompt=self.settings.system_prompt,
messages=self.conversation.to_api_messages(),
)
self._record_usage(result)
# Replace history with summary + most-recent. This drops the
# summarize_request turn we just appended (it's only there to
# produce the summary; not useful in the rolling history).
self.conversation.replace_with_summary(result.text)
after = self.conversation.total_chars()
logger.info(
"summarization: %d chars -> %d chars (cost $%.4f)",
before,
after,
result.estimated_cost_usd,
)
def _contains_needs_jc(self, text: str) -> bool:
return NEEDS_JC_TOKEN in text[:NEEDS_JC_SCAN_CHARS]
def _enter_needs_jc(self, response_text: str) -> None:
self.status.state = "needs_jc"
self.status.last_needs_jc_at = datetime.now(timezone.utc).isoformat(timespec="seconds")
self.status.last_needs_jc_text = response_text[:1000]
logger.warning("[NEEDS-JC] flagged; daemon paused awaiting state/jc_input.txt")
notify(
self.settings.ntfy_topic,
title="[NEEDS-JC] relay paused",
message=response_text[:400],
priority="high",
tags=["warning"],
)
def _clear_needs_jc(self) -> None:
if self.status.state != "running":
logger.info("clearing needs_jc state (was %s)", self.status.state)
self.status.state = "running"
def _fallback_session_id(self) -> str | None:
if self.settings.sessions:
return self.settings.sessions[0].session_id
return None
def _record_usage(self, result: TurnResult) -> None:
self.status.total_input_tokens += result.input_tokens
self.status.total_output_tokens += result.output_tokens
self.status.total_cost_usd += result.estimated_cost_usd
def _persist_status(self) -> None:
self.status.queue_depth = (
len(list((self.settings.queue_dir).iterdir()))
if self.settings.queue_dir.exists()
else 0
)
self.status.history_chars = self.conversation.total_chars()
self.status.history_turns = len(self.conversation.turns)
write_json_atomic(self.settings.state_dir / STATUS_FILE, self.status.as_dict())
def _check_stuck_queue(self) -> None:
age = stuck_age_seconds(self.settings.queue_dir)
if age <= STUCK_QUEUE_THRESHOLD_SEC:
return
now = time.time()
if now - self.status.last_stuck_alert_ts < STUCK_QUEUE_REPEAT_SEC:
return
self.status.last_stuck_alert_ts = now
logger.warning("queue stuck: oldest entry is %.0fs old", age)
notify(
self.settings.ntfy_topic,
title="relay queue stuck",
message=f"oldest entry is {int(age)}s old; daemon may be paused or the API failing.",
priority="high",
tags=["warning"],
)
def _notify_error(self, title: str, message: str) -> None:
notify(self.settings.ntfy_topic, title=title, message=message, priority="high", tags=["x"])

71
relay/dispatch.py Normal file
View File

@@ -0,0 +1,71 @@
"""Dispatch writer.
Each registered CC session has a directory ``dispatch/<session_id>/``.
The daemon delivers a chat-side response by writing it to
``dispatch/<session_id>/input.txt``. CC's polling loop reads, acts on
the content, and **deletes** the file as the acknowledgement.
The "only write when prior is consumed" rule means the daemon must not
overwrite a pending dispatch — otherwise CC could miss a turn. If the
daemon has new content for a session that hasn't yet consumed the
previous one, it queues internally and waits.
"""
from __future__ import annotations
import logging
from collections import defaultdict, deque
from dataclasses import dataclass, field
from pathlib import Path
from relay.state import write_atomic
logger = logging.getLogger(__name__)
@dataclass
class DispatchManager:
dispatch_dir: Path
_pending: dict[str, deque[str]] = field(default_factory=lambda: defaultdict(deque))
def session_dir(self, session_id: str) -> Path:
return self.dispatch_dir / session_id
def input_path(self, session_id: str) -> Path:
return self.session_dir(session_id) / "input.txt"
def has_pending(self, session_id: str) -> bool:
return bool(self._pending[session_id])
def session_input_present(self, session_id: str) -> bool:
"""True iff the session's input.txt exists (CC hasn't consumed yet)."""
return self.input_path(session_id).exists()
def queue_or_write(self, session_id: str, content: str) -> bool:
"""Try to deliver content. If session's input.txt is still present,
queue internally and return False; otherwise write and return True.
"""
self._pending[session_id].append(content)
return self._flush_session(session_id)
def flush_all(self) -> int:
"""Try to flush queued dispatches for every session. Returns count delivered."""
delivered = 0
for session_id in list(self._pending.keys()):
while self._pending[session_id]:
if not self._flush_session(session_id):
break
delivered += 1
return delivered
def _flush_session(self, session_id: str) -> bool:
if not self._pending[session_id]:
return False
if self.session_input_present(session_id):
return False
next_content = self._pending[session_id].popleft()
path = self.input_path(session_id)
path.parent.mkdir(parents=True, exist_ok=True)
write_atomic(path, next_content)
logger.info("dispatched %d chars to %s -> %s", len(next_content), session_id, path)
return True

46
relay/logs.py Normal file
View File

@@ -0,0 +1,46 @@
"""Logging setup.
Root logger writes to stdout (so tmux/systemd captures it) and to
``logs/relay-YYYY-MM-DD.log`` with daily rotation. Format includes
timestamp, level, logger name, and message.
"""
from __future__ import annotations
import logging
import sys
from logging.handlers import TimedRotatingFileHandler
from pathlib import Path
def configure(logs_dir: Path, level: int = logging.INFO) -> None:
logs_dir.mkdir(parents=True, exist_ok=True)
fmt = logging.Formatter(
fmt="%(asctime)s %(levelname)-7s %(name)-22s %(message)s",
datefmt="%Y-%m-%dT%H:%M:%S",
)
root = logging.getLogger()
root.setLevel(level)
# Remove pre-existing handlers (idempotent across reload during tests)
for handler in list(root.handlers):
root.removeHandler(handler)
stdout = logging.StreamHandler(stream=sys.stdout)
stdout.setFormatter(fmt)
root.addHandler(stdout)
file_handler = TimedRotatingFileHandler(
filename=str(logs_dir / "relay.log"),
when="midnight",
backupCount=14,
encoding="utf-8",
utc=True,
)
file_handler.setFormatter(fmt)
root.addHandler(file_handler)
# Quiet libraries that are too chatty at INFO
for noisy in ("urllib3", "httpx", "httpcore", "anthropic"):
logging.getLogger(noisy).setLevel(logging.WARNING)

59
relay/ntfy.py Normal file
View File

@@ -0,0 +1,59 @@
"""ntfy.sh notifications.
Topic is loaded from settings (auto-generated on first run). The topic
is functionally a password — anyone subscribed to the topic URL
receives notifications. We use cryptographically-random topics
(secrets.token_urlsafe(12)) to make brute-force discovery impractical.
Notifications are best-effort: a failure to deliver (network down,
ntfy.sh outage) is logged but does NOT block the daemon's main loop.
"""
from __future__ import annotations
import logging
import requests
logger = logging.getLogger(__name__)
NTFY_BASE = "https://ntfy.sh"
def topic_url(topic: str) -> str:
return f"{NTFY_BASE}/{topic}"
def notify(
topic: str,
title: str,
message: str,
*,
priority: str = "default",
tags: list[str] | None = None,
) -> bool:
"""Post one notification. Returns True on HTTP 200, False otherwise.
Never raises on transport errors — this path runs from the daemon's
main loop and a failed notification should not stop work.
"""
if not topic:
logger.warning("ntfy topic is empty; skipping notification: %s", title)
return False
headers = {
"Title": title,
"Priority": priority,
}
if tags:
headers["Tags"] = ",".join(tags)
try:
resp = requests.post(
topic_url(topic), data=message.encode("utf-8"), headers=headers, timeout=10
)
except requests.RequestException as exc:
logger.warning("ntfy delivery failed for topic <redacted>: %s", exc)
return False
if resp.status_code != 200:
logger.warning("ntfy non-200 (%s): %s", resp.status_code, resp.text[:200])
return False
return True

129
relay/queue.py Normal file
View File

@@ -0,0 +1,129 @@
"""Queue intake.
CC sessions drop JSON envelopes into ``queue/``. The envelope shape:
{
"session_id": "session-1",
"timestamp": "2026-05-02T12:34:56Z",
"content": "..."
}
The daemon picks the oldest entry by mtime, validates the envelope,
returns it, and deletes the file once processing succeeds. A
malformed envelope is moved to ``queue/.rejected/`` so the daemon
doesn't retry it forever.
"""
from __future__ import annotations
import json
import logging
import shutil
import time
from dataclasses import dataclass
from pathlib import Path
logger = logging.getLogger(__name__)
class QueueError(RuntimeError):
pass
@dataclass(frozen=True)
class QueueEntry:
path: Path
session_id: str
timestamp: str
content: str
mtime: float
def _validate_envelope(payload: object, *, path: Path) -> tuple[str, str, str]:
if not isinstance(payload, dict):
raise QueueError(f"{path}: envelope must be a JSON object")
for required in ("session_id", "timestamp", "content"):
if required not in payload:
raise QueueError(f"{path}: missing required key '{required}'")
session_id = str(payload["session_id"]).strip()
timestamp = str(payload["timestamp"]).strip()
content = str(payload["content"])
if not session_id:
raise QueueError(f"{path}: session_id is empty")
if not timestamp:
raise QueueError(f"{path}: timestamp is empty")
if not content.strip():
raise QueueError(f"{path}: content is empty")
return session_id, timestamp, content
def _reject(path: Path, reason: str) -> None:
rejected_dir = path.parent / ".rejected"
rejected_dir.mkdir(exist_ok=True)
target = rejected_dir / path.name
shutil.move(str(path), str(target))
logger.warning("rejected queue entry %s -> %s: %s", path, target, reason)
def list_pending(queue_dir: Path) -> list[Path]:
"""Return queue entries oldest-first, excluding the .rejected dir."""
if not queue_dir.exists():
return []
entries = [
p
for p in queue_dir.iterdir()
if p.is_file() and p.suffix == ".json" and not p.name.startswith(".")
]
entries.sort(key=lambda p: p.stat().st_mtime)
return entries
def take_oldest(queue_dir: Path) -> QueueEntry | None:
"""Read and validate the oldest queue entry. Returns None if queue empty.
Rejects malformed envelopes by moving them to .rejected/. The caller
is responsible for calling ``ack(entry)`` once processing succeeds
to delete the file; until then it remains in the queue.
"""
pending = list_pending(queue_dir)
if not pending:
return None
path = pending[0]
try:
raw = path.read_text(encoding="utf-8")
payload = json.loads(raw)
session_id, timestamp, content = _validate_envelope(payload, path=path)
except (OSError, json.JSONDecodeError) as exc:
_reject(path, f"unreadable / not JSON: {exc}")
return None
except QueueError as exc:
_reject(path, str(exc))
return None
return QueueEntry(
path=path,
session_id=session_id,
timestamp=timestamp,
content=content,
mtime=path.stat().st_mtime,
)
def ack(entry: QueueEntry) -> None:
"""Delete the queue file. Called by the daemon after the turn is dispatched."""
try:
entry.path.unlink()
except FileNotFoundError:
pass
def stuck_age_seconds(queue_dir: Path) -> float:
"""How long the oldest pending entry has been waiting. 0 if queue empty.
Used by the daemon's heartbeat to fire a ntfy alert when the queue
is stuck (e.g., persistent API failure).
"""
pending = list_pending(queue_dir)
if not pending:
return 0.0
return time.time() - pending[0].stat().st_mtime

100
relay/state.py Normal file
View File

@@ -0,0 +1,100 @@
"""Atomic state-file I/O and instance lock.
The conversation history lives at ``state/conversation.json``. Mutated
in-memory by the daemon and written via temp+rename to avoid partial
writes if the process is killed mid-write. A ``state/.lock`` advisory
file stops two daemons from running against the same directory.
"""
from __future__ import annotations
import errno
import fcntl
import json
import os
from dataclasses import dataclass
from pathlib import Path
from tempfile import NamedTemporaryFile
class StateError(RuntimeError):
pass
@dataclass
class InstanceLock:
"""Holds a flock on state/.lock for the daemon's lifetime.
Released automatically on process exit (kernel closes the fd) or by
calling ``release()``. ``acquire()`` raises StateError if another
daemon already holds the lock.
"""
lock_path: Path
_fd: int | None = None
def acquire(self) -> None:
self._fd = os.open(self.lock_path, os.O_RDWR | os.O_CREAT, 0o600)
try:
fcntl.flock(self._fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
except OSError as exc:
os.close(self._fd)
self._fd = None
if exc.errno in {errno.EAGAIN, errno.EACCES}:
raise StateError(
f"Another daemon is holding {self.lock_path}; refusing to start"
) from exc
raise
os.write(self._fd, str(os.getpid()).encode())
def release(self) -> None:
if self._fd is None:
return
try:
fcntl.flock(self._fd, fcntl.LOCK_UN)
finally:
os.close(self._fd)
self._fd = None
def write_atomic(path: Path, data: str) -> None:
"""Atomically write text to ``path`` via temp file + rename.
Crash-safe: a partial write leaves the temp file but does not
overwrite the target. fsync the data file (not the directory) so the
rename atomicity gives us durability up to the OS-level rename
barrier.
"""
path.parent.mkdir(parents=True, exist_ok=True)
with NamedTemporaryFile(
mode="w",
encoding="utf-8",
dir=str(path.parent),
prefix=f".{path.name}.",
suffix=".tmp",
delete=False,
) as tmp:
tmp.write(data)
tmp.flush()
os.fsync(tmp.fileno())
tmp_path = Path(tmp.name)
os.replace(tmp_path, path)
def write_json_atomic(path: Path, value: object) -> None:
write_atomic(path, json.dumps(value, indent=2, ensure_ascii=False, sort_keys=False))
def read_json(path: Path, default: object) -> object:
"""Read JSON or return default when the file is missing or empty."""
if not path.exists():
return default
raw = path.read_text(encoding="utf-8").strip()
if not raw:
return default
try:
return json.loads(raw)
except json.JSONDecodeError as exc:
raise StateError(f"Corrupt JSON at {path}: {exc}") from exc