feat: relay daemon skeleton — queue, dispatch, conversation, ntfy (#1)
First-PR scope from #1. Single-process Python daemon that relays between Claude Code instances and chat-Claude (Anthropic API). Components: * relay.config — .env + config.yaml loader. Auto-generates ntfy topic on first run and persists it back to .env. * relay.state — atomic file I/O via tempfile + rename, advisory flock at state/.lock to enforce single-instance. * relay.conversation — append-only history with summarization. Triggers a summarize call when total chars exceed HISTORY_CHAR_CAP (default 400k); replaces history with the summary plus the most recent 10 turns. * relay.anthropic_client — SDK wrapper. Marks the system prompt cacheable (5-min ephemeral cache); concatenates text blocks; estimates per-call cost from the Anthropic price table with cache-write/read accounted for. * relay.queue — JSON envelope intake; oldest-by-mtime; malformed envelopes moved to queue/.rejected/. * relay.dispatch — one-input-at-a-time per session (dispatch/<session_id>/input.txt). Won't overwrite a pending dispatch; queues internally and waits for CC to delete. * relay.ntfy — best-effort POST to https://ntfy.sh/<topic>; failures logged but never block the main loop. * relay.daemon — main loop. Polls jc_input.txt (priority) then queue/. Detects [NEEDS-JC] in the first 200 chars of any response and pauses dispatch until JC writes jc_input.txt. JC override supports @session-N: prefix for direct dispatch without an API call. * relay.__main__ — CLI: relay run / relay status / relay topic. Tests: 57 unit tests pass (config, state, conversation, queue, dispatch, anthropic_client, ntfy, full daemon loop with a fake client). One real-API smoke test marked real_api, opt-in via pytest -m real_api; skips cleanly on credit-balance errors. Out of scope for this PR (deferred to follow-ups): Flask status endpoint, multi-session config in production, exponential backoff, systemd unit, cost-tracking aggregation. Closes #1. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
100
relay/state.py
Normal file
100
relay/state.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""Atomic state-file I/O and instance lock.
|
||||
|
||||
The conversation history lives at ``state/conversation.json``. Mutated
|
||||
in-memory by the daemon and written via temp+rename to avoid partial
|
||||
writes if the process is killed mid-write. A ``state/.lock`` advisory
|
||||
file stops two daemons from running against the same directory.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import errno
|
||||
import fcntl
|
||||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
|
||||
class StateError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
@dataclass
|
||||
class InstanceLock:
|
||||
"""Holds a flock on state/.lock for the daemon's lifetime.
|
||||
|
||||
Released automatically on process exit (kernel closes the fd) or by
|
||||
calling ``release()``. ``acquire()`` raises StateError if another
|
||||
daemon already holds the lock.
|
||||
"""
|
||||
|
||||
lock_path: Path
|
||||
_fd: int | None = None
|
||||
|
||||
def acquire(self) -> None:
|
||||
self._fd = os.open(self.lock_path, os.O_RDWR | os.O_CREAT, 0o600)
|
||||
try:
|
||||
fcntl.flock(self._fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
except OSError as exc:
|
||||
os.close(self._fd)
|
||||
self._fd = None
|
||||
if exc.errno in {errno.EAGAIN, errno.EACCES}:
|
||||
raise StateError(
|
||||
f"Another daemon is holding {self.lock_path}; refusing to start"
|
||||
) from exc
|
||||
raise
|
||||
os.write(self._fd, str(os.getpid()).encode())
|
||||
|
||||
def release(self) -> None:
|
||||
if self._fd is None:
|
||||
return
|
||||
try:
|
||||
fcntl.flock(self._fd, fcntl.LOCK_UN)
|
||||
finally:
|
||||
os.close(self._fd)
|
||||
self._fd = None
|
||||
|
||||
|
||||
def write_atomic(path: Path, data: str) -> None:
|
||||
"""Atomically write text to ``path`` via temp file + rename.
|
||||
|
||||
Crash-safe: a partial write leaves the temp file but does not
|
||||
overwrite the target. fsync the data file (not the directory) so the
|
||||
rename atomicity gives us durability up to the OS-level rename
|
||||
barrier.
|
||||
"""
|
||||
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with NamedTemporaryFile(
|
||||
mode="w",
|
||||
encoding="utf-8",
|
||||
dir=str(path.parent),
|
||||
prefix=f".{path.name}.",
|
||||
suffix=".tmp",
|
||||
delete=False,
|
||||
) as tmp:
|
||||
tmp.write(data)
|
||||
tmp.flush()
|
||||
os.fsync(tmp.fileno())
|
||||
tmp_path = Path(tmp.name)
|
||||
os.replace(tmp_path, path)
|
||||
|
||||
|
||||
def write_json_atomic(path: Path, value: object) -> None:
|
||||
write_atomic(path, json.dumps(value, indent=2, ensure_ascii=False, sort_keys=False))
|
||||
|
||||
|
||||
def read_json(path: Path, default: object) -> object:
|
||||
"""Read JSON or return default when the file is missing or empty."""
|
||||
|
||||
if not path.exists():
|
||||
return default
|
||||
raw = path.read_text(encoding="utf-8").strip()
|
||||
if not raw:
|
||||
return default
|
||||
try:
|
||||
return json.loads(raw)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise StateError(f"Corrupt JSON at {path}: {exc}") from exc
|
||||
Reference in New Issue
Block a user