From 63edc33fc4a3b8a5408cb7a361f184b7422bad8c Mon Sep 17 00:00:00 2001 From: Anthony Cardinale Date: Mon, 4 May 2026 09:23:02 -0400 Subject: [PATCH] Initial commit: skill files, docs site, README MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - SKILL.md and pipeline.py from ~/.claude/skills/screenshot-rename/ - docs/index.html — archival/typewriter aesthetic homepage with hero monument, problem, 4-stage pipeline, before/after split, run-log receipt, ten gotchas, four use cases, install snippets - MIT license Co-Authored-By: Claude Opus 4.7 (1M context) --- .gitignore | 3 + LICENSE | 21 + README.md | 87 +++++ SKILL.md | 160 ++++++++ docs/index.html | 996 ++++++++++++++++++++++++++++++++++++++++++++++++ pipeline.py | 280 ++++++++++++++ 6 files changed, 1547 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 SKILL.md create mode 100644 docs/index.html create mode 100644 pipeline.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b908d4c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +*.pyc +.DS_Store diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b2e4033 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Roberto Cardinale + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..3ea2c59 --- /dev/null +++ b/README.md @@ -0,0 +1,87 @@ +# screenshot-rename + +> A Claude Code skill that turns a folder of timestamp-named screenshots into a folder of human-readable, searchable filenames — using parallel Haiku vision agents. + +``` +CleanShot 2026-04-15 at 09.14.07.png + ↓ +CleanShot - Shamel Studio Affiliate Referral Code Modal - 2026-04-15 at 09.14.07.png +``` + +Built for [CleanShot](https://cleanshot.com)-style screenshot folders, but works on any directory of `.png` / `.gif` / `.mp4` / `.pdf` files named only by timestamp. + +## Highlights + +- **Parallel** — describes ~200 files in 3 minutes using 10 concurrent Haiku subagents. +- **Safe** — pre-builds the full rename plan in memory, validates uniqueness and target collisions, then renames atomically with file-count audit. Designed after losing 4 files to a `mv` overwrite during prototyping. +- **Handles video/PDF** — extracts the first frame so vision agents can describe them. +- **Resizes for the vision tool** — Retina screenshots exceed Read's image cap; pipeline downsamples to 1568px max. + +## Installation + +This is a Claude Code skill. Drop the `screenshot-rename/` directory into `~/.claude/skills/`: + +```bash +git clone https://gitea.tojo.team/cardinale/screenshot-rename.git ~/.claude/skills/screenshot-rename +``` + +In your next Claude Code session, ask: + +> rename all the cleanshot files in `~/Documents/Screenshots/` based on their content + +The skill will activate automatically. + +## Usage from the command line + +You can also drive the pipeline directly: + +```bash +# 1. Prep — extract frames, resize, build batches +python3 pipeline.py prep --src "/path/to/folder" --batch-size 19 + +# 2. (In a Claude Code session, dispatch one Haiku subagent per +# /tmp/screenshot-rename/full-batch-NN file using the prompt template +# in SKILL.md.) + +# 3. Plan — aggregate descriptions, validate, build rename map +python3 pipeline.py plan --src "/path/to/folder" + +# 4. Execute — apply the plan, audit file count +python3 pipeline.py execute --src "/path/to/folder" +``` + +The dispatch step (#2) currently requires a Claude Code session. See [Roadmap](#roadmap). + +## Documentation + +- **Homepage with worked examples:** [docs/index.html](docs/index.html) +- **Full skill spec:** [SKILL.md](SKILL.md) +- **Pipeline source:** [pipeline.py](pipeline.py) + +## The gotchas this skill encodes + +This skill exists because every one of these caused real damage during development: + +1. The macOS `Read` tool has an image-size cap. Resize first. +2. Vision can't read `.mp4` or multi-page `.pdf` directly. Extract a frame. +3. **Bash regex `[[ =~ ]]` does NOT populate `BASH_REMATCH` in zsh.** Targets become empty. Loops collide on the same filename. Files vanish. Use Python for any filename mutation. +4. `mv` silently overwrites. Use `mv -n` or `os.rename` with explicit pre-existence check. +5. Pre-build the entire rename plan in memory and validate uniqueness before any `mv`. +6. Audit `len(os.listdir(DEST))` before and after. Equal count == proof no overwrites. +7. iCloud-synced files in Time Machine local snapshots are file-provider stubs, not bytes. External backups (Backblaze, Time Machine to physical disk) are the real recovery source. +8. `Bash run_in_background` may exit early on `while read` loops. Run renames foreground via Python. +9. Haiku occasionally returns the resized `.jpg` filename instead of the original `.png`. Validator must try alt extensions. +10. Always preserve the original `.mp4` / `.pdf` extension — describe via the extracted frame, rename the source. + +The full discussion is in [SKILL.md](SKILL.md#the-critical-gotchas-every-one-of-these-caused-real-pain). + +## Roadmap + +- Direct Anthropic API mode (no Claude Code session required) — needs `ANTHROPIC_API_KEY` +- Custom prompt templates per-folder +- Optional preservation of dots in technical strings (`v2.1` currently becomes `V21`) +- Dry-run flag on `execute` + +## License + +MIT — see [LICENSE](LICENSE). diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..185b367 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,160 @@ +--- +name: screenshot-rename +description: Use when renaming a folder of screenshots, images, or short clips with AI-generated descriptive names — particularly CleanShot exports or any directory of images named only by timestamp. Triggers on requests like "rename these screenshots based on their content", "describe each of these images and rename it", or batch rename of files by visual content. +--- + +# Screenshot Rename + +## Overview + +Rename a directory of timestamp-named images (PNG / GIF / MP4 / PDF) to include AI-generated content descriptions, dispatched as parallel Haiku subagents from this Claude Code session. Each rename has the form: + +``` + - - <original timestamp>.<ext> +``` + +The pipeline is **prep → batch → describe (parallel agents) → validate plan → execute renames** with hard data-loss guards at every stage. + +**Core principle:** *Plan in memory, validate exhaustively, then mutate the filesystem in a single pass with `os.rename` and pre-existence checks.* Never let `mv` overwrite — that's how you lose files. + +## When to Use + +- Renaming CleanShot / screenshot folders by content +- Any image batch where the source filenames are timestamps and the user wants them human-scannable +- ≥ ~10 files (otherwise just rename them inline) +- Files include PNG/GIF and optionally MP4 or PDF (pipeline handles all four) + +**Don't use for:** +- Code or text files — vision isn't needed +- Files where the name pattern is already meaningful +- Single-file rename (just do it directly) + +## Workflow + +``` +1. Prep + ├─ Extract first frame from each .mp4 (ffmpeg) and .pdf (sips) to /tmp/frames/<base>.jpg + ├─ Resize every source image to max 1568px on long edge → /tmp/small/<base>.jpg + └─ Build manifest TSV: <small_image_path>\t<original_filename> + +2. Batch + └─ Split manifest into N batches of ≤ 20 lines each (file: full-batch-NN) + +3. Describe (parallel) + └─ Dispatch N Haiku subagents (model: "haiku") in a single message + Each agent: reads its batch manifest, uses Read on each image_path, + writes desc-full-NN.tsv with: <original_filename>\t<6-8 word description> + +4. Plan (Python) + ├─ Aggregate all desc-*.tsv into desc-all.tsv + ├─ Validate every line: 6+ words, alnum+space only, source exists, target doesn't, + │ no plan-internal collisions + ├─ Truncate descriptions to 8 words max, title-case + └─ Write plan-full.tsv: <original>\t<new_name> + +5. Execute (Python, NEVER bash) + ├─ Read plan, for each line: pre-check src exists & dst doesn't, then os.rename + ├─ Audit before/after file count — must be equal + └─ Log failures, report ok/fail counts +``` + +## The Critical Gotchas (every one of these caused real pain) + +1. **Read tool has an image-size cap.** Original Retina screenshots can exceed it. **Always downscale** to ≤ 1568px before handing to a subagent. Use `sips -Z 1568 -s format jpeg`. + +2. **Vision API can't read .mp4 or multi-page .pdf directly.** Extract the first frame to a JPEG first (`ffmpeg -ss 1 -i in.mp4 -frames:v 1 out.jpg`, `sips -s format jpeg in.pdf --out out.jpg`). + +3. **Bash regex with `[[ =~ ]]` + `BASH_REMATCH` does NOT work in zsh.** zsh uses `$match[1]` etc. instead. Pattern silently fails, target name becomes empty, multiple `mv`s collide on the same empty target, files vanish. **Use Python for any filename mutation.** No exceptions. + +4. **`mv` silently overwrites.** A loop that constructs target names from a buggy parse will happily destroy your data. Use `mv -n` (no-clobber) in shell, or `os.rename` after `os.path.exists(dst)` check in Python. Never bare `mv`. + +5. **Pre-flight the full plan in memory** before mutating the filesystem. Build a list of `(orig, new)` tuples; verify every `new` is unique within the plan, doesn't collide with anything in the destination directory, and that every `orig` exists. Only then start renaming. + +6. **File-count audit.** Record `len(os.listdir(DEST))` before and after — must be equal. Any drop = data loss. + +7. **iCloud-synced trees and Time Machine local snapshots:** files in the snapshot are *file-provider stubs*, not the bytes. `cat` / `cp` from a snapshot path inside an iCloud-synced folder returns "Operation timed out" with a 0-byte file. **External backups (Backblaze, Time Machine to a real disk) are the actual recovery source for iCloud data**, not local APFS snapshots. + +8. **Bash background jobs in the Claude Code Bash tool can die silently.** A `while read` loop redirected from a file may exit immediately when run in the background. **Run renames foreground via Python** — it's the same code path locally and reliably runs to completion. + +9. **Haiku occasionally returns the wrong filename extension** (the resized `.jpg` instead of the original `.png`). The plan-builder must accept that and try alternate extensions when the claimed source isn't found in the destination directory. + +10. **Always preserve mp4/pdf source files** — the pipeline reads from the resized JPEG but renames the original mp4/pdf. Don't lose the source extension. + +## Quick Reference + +| Step | Command | +|---|---| +| Extract mp4 frame | `ffmpeg -y -ss 1 -i "$f" -frames:v 1 -q:v 3 "$out"` | +| Convert pdf to jpg | `sips -s format jpeg "$f" --out "$out"` | +| Resize for vision | `sips -Z 1568 -s format jpeg "$f" --out "$out"` | +| Split TSV into batches of 20 | `awk -v w=DIR 'BEGIN{n=1;c=0} {print > sprintf("%s/batch-%02d", w, n); c++; if(c>=20){c=0;n++}}'` | +| Dispatch agent | Agent tool, `subagent_type=general-purpose`, `model="haiku"`, `run_in_background=true` | +| Execute renames | Python `os.rename` with pre-existence check (NEVER bash `mv` in a loop) | + +## Reusable Pipeline + +The prep, plan, and rename phases are in `pipeline.py`. The dispatch phase is performed by Claude Code itself (Agent tool calls) and cannot be scripted from inside Python — that's the trade-off of option (b). + +Run order: + +```bash +# 1. Prep + batch +python3 ~/.claude/skills/screenshot-rename/pipeline.py prep \ + --src "/path/to/folder" --batch-size 19 + +# Now dispatch one Haiku Agent per /tmp/screenshot-rename/full-batch-NN file +# (Claude Code does this — see SKILL.md "Workflow" step 3) + +# 2. After all desc-full-NN.tsv files exist: +python3 ~/.claude/skills/screenshot-rename/pipeline.py plan \ + --src "/path/to/folder" + +# 3. Review the plan, then: +python3 ~/.claude/skills/screenshot-rename/pipeline.py execute \ + --src "/path/to/folder" +``` + +## Subagent Prompt Template + +Use exactly this prompt for each batch (substitute the batch number): + +``` +Describe screenshots so they can be renamed. + +Read the manifest at `/tmp/screenshot-rename/full-batch-NN`. Each line: `image_path<TAB>original_filename`. + +For EACH line: +1. Use Read on `image_path` (first column) to view the image. +2. Generate a description of EXACTLY 6, 7, or 8 words describing "what app is shown and what the content is". Count your words. Be specific about app names when visible. Use only ASCII letters, numbers, and spaces — NO slashes, colons, dashes, quotes, special characters. Lowercase. 6-8 words. + +Output: write `/tmp/screenshot-rename/desc-full-NN.tsv` via Write tool. Each line: `original_filename<TAB>description`. <count> lines total. + +Then run `wc -l` on the output file to verify the line count. + +Return only "DONE: <count> lines" or an error report. +``` + +Dispatch all batches **in a single message with multiple Agent tool calls** so they run in parallel. Use `run_in_background=true` so you can keep working. + +## Common Mistakes + +| Mistake | What goes wrong | Fix | +|---|---|---| +| `mv $f $newname` in a bash loop | One bug → silent overwrite → data loss | `os.rename` in Python with pre-existence check | +| Building target name with bash regex | zsh doesn't populate BASH_REMATCH; empty targets | Use Python `os.path.splitext` and string ops | +| Sending original Retina images to Read | "Image too large" error mid-batch, partial output | Resize to 1568px first | +| Sending .mp4 to vision | Read fails | Extract first frame to JPEG first | +| Skipping the file-count audit | Silent data loss goes unnoticed | `len(os.listdir(DEST))` before & after — must be equal | +| Trusting Haiku's filename column | 30%+ of entries may have wrong extension | Plan-builder tries alt extensions | +| Running rename loop in background `Bash run_in_background=true` | Background `while read` may exit immediately, 0 progress | Run via Python foreground (it's fast — `os.rename` is just a syscall) | + +## Recovery — if something does go wrong + +1. **Check `~/Library/Application Support/CleanShot/media/`** — CleanShot keeps a recent media history. +2. **Check external backups (Backblaze, Time Machine to physical disk)** — these contain real file bytes. +3. **Local APFS Time Machine snapshots are NOT useful for iCloud-synced files** — they store file-provider stubs that time out on read. +4. **Check icloud.com → Drive → Recently Deleted** — iCloud keeps deleted files for ~30 days, but `mv` overwrites are NOT "deletes" from iCloud's perspective and may not appear there. + +## Real-World Impact + +First run on 196 CleanShot files lost 4 of them due to the bash-regex-in-zsh gotcha (rule #3). After the rebuild with Python and `mv -n`, second run renamed 189 files cleanly with zero loss. This skill exists so that doesn't happen again. diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 0000000..d7dd12d --- /dev/null +++ b/docs/index.html @@ -0,0 +1,996 @@ +<!doctype html> +<html lang="en"> +<head> +<meta charset="utf-8"> +<meta name="viewport" content="width=device-width,initial-scale=1"> +<title>screenshot-rename — a Claude Code skill + + + + + + + + + + +
+ +
+ +
+ + +
+
+
A claude code skill · vision-described renames
+

A folder
of timestamps,
turned into a manifest.

+

+ Two hundred screenshots, all named CleanShot 2026-04-15 at 09.14.07.png. + Run this skill: ten Haiku subagents read each one in parallel, write a six-to-eight word + description, and rename the file in place — atomically, with the safety nets the author + cost himself four files to learn. +

+ + +
+
+ + CleanShot 2026-04-15 at 09.14.07.png +
+
+ + CleanShot · Shamel Studio Affiliate Referral Code Modal · 2026-04-15 at 09.14.07.png +
+
+
+
+ + +
+
+ +

You can't find a screenshot you took six months ago.

+
+
    +
  • CleanShot 2025-09-26 at 16.27.39.png
  • +
  • CleanShot 2025-11-19 at 13.12.36.png
  • +
  • CleanShot 2025-12-05 at 11.24.33.png
  • +
  • CleanShot 2026-02-18 at 12.48.31.png
  • +
  • CleanShot 2026-03-04 at 06.13.44.png
  • +
  • CleanShot 2026-03-17 at 22.10.20.mp4
  • +
  • CleanShot 2026-03-21 at 11.46.42.png
  • +
  • CleanShot 2026-04-08 at 12.09.10.png
  • +
  • …and 187 more
  • +
+
+

+ A timestamp tells you when a screenshot exists. + It doesn't tell you what's in it. Spotlight indexes the + pixels reluctantly; iCloud-synced folders less reliably still. The only + way most people find an old screenshot is by remembering, roughly, + what they were doing the week they took it — and scrolling. +

+

+ The real cost isn't filesystem clutter. It's the screenshots you + stopped taking, because past you knew future you wouldn't be able to + surface them. +

+ 196 → +

files renamed in the first run that motivated this skill, in three minutes, with zero loss after the second pass. The first pass cost four files. That's why the safety rules below are written the way they are.

+
+
+
+
+ + +
+
+ +

Four stages, in two minutes.

+

+ The skill does as little as possible, and validates as much as possible. + Subagents handle the work that benefits from parallelism (vision); Python + handles the work that benefits from being correct (filename mutation, + collision detection, the actual os.rename). +

+ +
+
+
Stage 01
+

Prep.

+

Extract the first frame from every .mp4 and .pdf. Resize every image to 1568px max — Read's image cap is real. Build a manifest TSV.

+

ffmpeg · sips · /tmp/screenshot-rename/full-batch-NN

+
+
+
Stage 02
+

Describe.

+

Dispatch one Haiku subagent per batch, in parallel — ten at a time. Each agent reads its 19 images and writes 6–8 word descriptions to desc-full-NN.tsv.

+

model · "haiku" · ~$0.30 / 200 files

+
+
+
Stage 03
+

Plan.

+

Aggregate. Validate every line: 6+ words, alnum only, source exists, target doesn't, no plan-internal collisions. Build the full rename map in memory.

+

plan-full.tsv · zero-error policy

+
+
+
Stage 04
+

Execute.

+

One os.rename per row, with pre-existence check. Audit len(listdir) before and after — it must be equal. That equality is your only proof no overwrites happened.

+

before == after · ok / fail

+
+
+
+
+ + +
+
+ +

Before a timestamp.
After, a sentence.

+

+ A real rename from the run that motivated this skill. The description + was generated by Haiku in roughly two seconds. +

+ +
+
+
Before
+
CleanShot 2026-03-17 at 22.10.20.mp4
+
+
Length36 chars
+
Searchableby date only
+
Tells youwhen
+
+
+
+
After
+
CleanShot · Claude Conversation About Context Calculator Implementation · 2026-03-17 at 22.10.20.mp4
+
+
Length91 chars
+
Searchableby content + date
+
Tells youwhat, when
+
+
+
+ +

+ The original timestamp survives unchanged. Sorting still works. The description sits between, set off by em-dashes. +

+
+
+ + +
+
+
+
screenshot-rename · run log · 2026-05-04
+
source files196
+
resized to 1568px196
+
frames extracted (mp4 / pdf)9
+
batches dispatched10 · parallel
+
haiku descriptions returned196
+
plan validated189 renames · 0 errors
+
plan collisionsnone
+
file count before195
+
file count after195
+
renames committed189 ✓
+
files lost0 ✓
+
+
+
+ + +
+
+ +

Every rule below was paid for.

+

+ During development, four files were destroyed by a one-line bash mistake. + Each rule names the failure mode that earned its place. None are aspirational. +

+
    +
  1. Resize before vision.Retina screenshots exceed Read's image cap. Use sips -Z 1568 -s format jpeg first. The agent will fail mid-batch otherwise.
  2. +
  3. Frames, not videos.The vision tool can't read .mp4 or multi-page .pdf. Extract a frame with ffmpeg -ss 1 -frames:v 1 and describe that.
  4. +
  5. Never trust bash regex on filenames.zsh's [[ =~ ]] does not populate BASH_REMATCH. Pattern silently fails, target name is empty, multiple mvs collide. Use Python.
  6. +
  7. mv overwrites silently.One off-by-one in target construction destroys data with no error. Use mv -n in shell, or os.rename after an os.path.exists check in Python.
  8. +
  9. Plan the full rename in memory first.Build every (src, dst) tuple. Verify each dst is unique, doesn't exist, and corresponds to a real src. Then mutate disk.
  10. +
  11. File-count audit, every time.len(listdir(DEST)) before and after must be equal. Inequality is the only evidence of silent loss you'll get.
  12. +
  13. iCloud snapshots are stubs, not bytes.Files in a Time Machine local snapshot inside an iCloud-synced tree are file-provider stubs. cat them and the read times out. Real recovery comes from external backups.
  14. +
  15. Run renames foreground.Bash run_in_background with while read may exit early with no progress. Run via Python in the same shell — os.rename is just a syscall.
  16. +
  17. Validate the filename column.Haiku occasionally returns the resized .jpg name instead of the original .png. The plan-builder must try alternate extensions when the claimed source isn't found.
  18. +
  19. Preserve the original extension.The pipeline reads from a resized JPEG but renames the original .mp4 / .pdf. Write the source extension back into the new name.
  20. +
+
+
+ + +
+
+ +

What this looks like in practice.

+

+ The skill earns its keep when "Spotlight will find it" stops being true. Four scenarios where it has. +

+ +
+
+
A · Archive
+

An audit of a year of work.

+

Run the skill on a ~year-old screenshot folder. The output is a chronologically-sorted + narrative of what you were thinking about, week by week — readable from the filename column + in Finder. No app needed.

+
CleanShot · Synqora Audit Context Calculator Discussion Continued · 2026-03-15 at 08.08.29.png
+
+
+
B · Recall
+

"Find the screenshot of the bug from last March."

+

Renaming once buys you free-text search forever. mdfind "synqora session load" + surfaces the right file in a fraction of a second, with no manual tagging.

+
CleanShot · Synqora Session Load Failed Disconnect Reconnecting Error · 2026-04-18 at 13.37.12.png
+
+
+
C · Onboarding
+

Designer joins. Hands them the folder.

+

Instead of curating a deck of "what we've shipped this quarter," point them at the renamed + screenshot folder. The filenames are the deck. Categorize by app, by feature, by + timeline — the descriptions are already there.

+
CleanShot · Xcode Preview Swiftui Render Table Comparison Tools · 2026-03-21 at 10.47.26.png
+
+
+
D · Memory
+

A searchable design memory.

+

Pair with a periodic re-run on new captures. The folder becomes a queryable artifact: + every screenshot you took, with what was in it, in plain text, in the filesystem you + already use. No new tool to adopt.

+
CleanShot · Storyboard Browser With Harry Bridges 1933 Rally Shots · 2026-05-03 at 07.58.27.png
+
+
+
+
+ + +
+
+ +

Three commands, one folder.

+

+ The skill installs as a Claude Code skill. Once cloned into ~/.claude/skills/, it + activates automatically when you ask Claude to rename a screenshot folder. It can also be + driven from the command line. +

+ +
install# clone into your Claude Code skills directory
+git clone https://gitea.tojo.team/cardinale/screenshot-rename.git \
+       ~/.claude/skills/screenshot-rename
+ +
+
+

Driven by Claude Code

+

Open Claude Code in any project and say it conversationally. The skill activates from its description and runs the workflow end to end.

+
claude code> rename all the cleanshots in
+  ~/Documents/Screenshots/
+  based on their content.
+
+
+

Driven directly from the shell

+

For folders too large for a single session, run each stage by hand. Dispatch the Haiku subagents from a Claude Code session in between.

+
clipython3 pipeline.py prep    --src "./shots"
+# dispatch one haiku agent per batch...
+python3 pipeline.py plan    --src "./shots"
+python3 pipeline.py execute --src "./shots"
+
+
+
+
+ +
+ +
+
+ +
+ Set in Fraunces & JetBrains Mono. Written after losing four files to a bash regex bug. +
+
+
+ + + + + diff --git a/pipeline.py b/pipeline.py new file mode 100644 index 0000000..9ff32cd --- /dev/null +++ b/pipeline.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python3 +"""Screenshot-rename pipeline. + +Three subcommands: + prep — extract frames, resize, build manifest, split into batches + plan — aggregate desc-*.tsv files, validate, write rename plan + execute — apply the plan with safety checks + +The Haiku-subagent dispatch step happens between `prep` and `plan` and is +performed by Claude Code in-session, not by this script. +""" + +import argparse +import os +import re +import shutil +import subprocess +import sys +from pathlib import Path + +WORK = Path("/tmp/screenshot-rename") +FRAMES = WORK / "frames" +SMALL = WORK / "small" + + +def run(cmd, **kw): + return subprocess.run(cmd, capture_output=True, text=True, **kw) + + +def title_case(s: str) -> str: + return " ".join(w.capitalize() for w in s.split()) + + +# ---------- prep ---------- + +def prep(src: Path, batch_size: int, prefix: str) -> None: + if not src.is_dir(): + sys.exit(f"source not a directory: {src}") + WORK.mkdir(parents=True, exist_ok=True) + FRAMES.mkdir(exist_ok=True) + SMALL.mkdir(exist_ok=True) + + pattern = re.compile(rf"^{re.escape(prefix)}\s+\d{{4}}-\d{{2}}-\d{{2}}.*$") + files = sorted(p for p in src.iterdir() if p.is_file() and pattern.match(p.name)) + if not files: + sys.exit(f"no matching files (prefix='{prefix}') in {src}") + print(f"found {len(files)} source files") + + manifest = WORK / "all.tsv" + with manifest.open("w") as out: + for f in files: + base = f.stem + ext = f.suffix.lower() + if ext in (".mp4", ".mov"): + frame = FRAMES / f"{base}.jpg" + if not frame.exists(): + r = run(["ffmpeg", "-y", "-ss", "1", "-i", str(f), + "-frames:v", "1", "-q:v", "3", str(frame)]) + if not frame.exists(): + print(f"WARN ffmpeg failed: {f.name}", file=sys.stderr) + continue + vision_src = frame + elif ext == ".pdf": + frame = FRAMES / f"{base}.jpg" + if not frame.exists(): + run(["sips", "-s", "format", "jpeg", str(f), "--out", str(frame)]) + if not frame.exists(): + print(f"WARN sips failed on pdf: {f.name}", file=sys.stderr) + continue + vision_src = frame + elif ext in (".png", ".gif", ".jpg", ".jpeg", ".webp"): + vision_src = f + else: + print(f"SKIP unknown ext: {f.name}", file=sys.stderr) + continue + + small = SMALL / f"{base}.jpg" + if not small.exists(): + run(["sips", "-Z", "1568", "-s", "format", "jpeg", + str(vision_src), "--out", str(small)]) + if not small.exists(): + print(f"WARN resize failed: {f.name}", file=sys.stderr) + continue + out.write(f"{small}\t{f.name}\n") + + # split into batches + for old in WORK.glob("full-batch-*"): + old.unlink() + lines = manifest.read_text().splitlines() + n_batches = max(1, (len(lines) + batch_size - 1) // batch_size) + for i in range(n_batches): + chunk = lines[i * batch_size:(i + 1) * batch_size] + (WORK / f"full-batch-{i+1:02d}").write_text("\n".join(chunk) + "\n") + print(f"prepped {len(lines)} files into {n_batches} batches in {WORK}") + print(f"\nDispatch {n_batches} Haiku subagents (one per batch).") + print(f"After all desc-full-NN.tsv files exist, run: pipeline.py plan --src '{src}'") + + +# ---------- plan ---------- + +def plan(src: Path, prefix: str, max_words: int) -> None: + if not src.is_dir(): + sys.exit(f"source not a directory: {src}") + descs = sorted(WORK.glob("desc-full-*.tsv")) + if not descs: + sys.exit("no desc-full-*.tsv files found in /tmp/screenshot-rename") + all_lines = [] + for p in descs: + all_lines.extend(p.read_text().splitlines()) + print(f"aggregated {len(all_lines)} description lines from {len(descs)} batches") + + existing = set(os.listdir(src)) + plan_rows = [] + errors = [] + seen = {} + + for lineno, line in enumerate(all_lines, 1): + line = line.rstrip() + if not line: + continue + parts = line.split("\t", 1) + if len(parts) != 2: + errors.append(f"L{lineno}: bad split: {line!r}") + continue + orig_claimed, desc = parts + + if not orig_claimed.startswith(prefix + " "): + errors.append(f"L{lineno}: prefix: {orig_claimed!r}") + continue + + # Find the actual file — Haiku occasionally returns .jpg instead of .png + orig = orig_claimed + if orig not in existing: + base = os.path.splitext(orig_claimed)[0] + for ext in (".png", ".gif", ".mp4", ".pdf", ".jpg", ".jpeg", ".webp"): + cand = base + ext + if cand in existing: + orig = cand + break + else: + errors.append(f"L{lineno}: source not found: {orig_claimed!r}") + continue + + words = desc.split() + if len(words) < 6: + errors.append(f"L{lineno}: <6 words: {orig!r} -> {desc!r}") + continue + words = words[:max_words] + cleaned = [] + for w in words: + cw = "".join(c for c in w if c.isalnum()) + if cw: + cleaned.append(cw) + if len(cleaned) < 6: + errors.append(f"L{lineno}: <6 after sanitize: {desc!r}") + continue + cleaned = cleaned[:max_words] + titled = title_case(" ".join(cleaned)) + + rest = orig[len(prefix) + 1:] # everything after "Prefix " + new = f"{prefix} - {titled} - {rest}" + + if new == orig: + errors.append(f"L{lineno}: same: {orig!r}") + continue + if new in existing: + errors.append(f"L{lineno}: target exists in DEST: {new!r}") + continue + if new in seen: + errors.append(f"L{lineno}: plan collision: {new!r} from {orig!r} and {seen[new]!r}") + continue + seen[new] = orig + plan_rows.append((orig, new)) + + print(f"plan: {len(plan_rows)} renames, {len(errors)} errors") + if errors: + print("\nERRORS:") + for e in errors[:30]: + print(f" {e}") + if len(errors) > 30: + print(f" ... and {len(errors) - 30} more") + + plan_path = WORK / "plan-full.tsv" + with plan_path.open("w") as f: + for orig, new in plan_rows: + f.write(f"{orig}\t{new}\n") + print(f"\nplan saved: {plan_path}") + print(f"sample (every {max(1, len(plan_rows)//6)}th row):") + step = max(1, len(plan_rows) // 6) + for i in range(0, len(plan_rows), step): + orig, new = plan_rows[i] + print(f" {orig}\n → {new}\n") + print(f"if plan looks good: pipeline.py execute --src '{src}'") + + +# ---------- execute ---------- + +def execute(src: Path) -> None: + if not src.is_dir(): + sys.exit(f"source not a directory: {src}") + plan_path = WORK / "plan-full.tsv" + if not plan_path.exists(): + sys.exit(f"no plan: {plan_path} (run `pipeline.py plan` first)") + + before = len(os.listdir(src)) + ok = 0 + fail = 0 + fails = [] + with plan_path.open() as f: + for line in f: + line = line.rstrip() + if not line: + continue + orig, new = line.split("\t", 1) + srcp = src / orig + dstp = src / new + if not srcp.exists(): + fails.append(f"src missing: {orig}") + fail += 1 + continue + if dstp.exists(): + fails.append(f"target exists: {new}") + fail += 1 + continue + try: + os.rename(srcp, dstp) + if dstp.exists() and not srcp.exists(): + ok += 1 + else: + fails.append(f"post-check failed: {orig}") + fail += 1 + except OSError as e: + fails.append(f"rename error {orig}: {e}") + fail += 1 + + after = len(os.listdir(src)) + print(f"ok={ok} fail={fail} before={before} after={after}") + if before != after: + print("⚠ FILE COUNT CHANGED — investigate immediately") + sys.exit(2) + print("file count unchanged ✓") + if fails: + fails_path = WORK / "rename-fails.txt" + fails_path.write_text("\n".join(fails)) + print(f"failures logged: {fails_path}") + for x in fails[:5]: + print(f" {x}") + + +# ---------- main ---------- + +def main() -> None: + p = argparse.ArgumentParser(description=__doc__) + sub = p.add_subparsers(dest="cmd", required=True) + + p_prep = sub.add_parser("prep", help="extract frames, resize, build batches") + p_prep.add_argument("--src", type=Path, required=True) + p_prep.add_argument("--batch-size", type=int, default=19) + p_prep.add_argument("--prefix", default="CleanShot", + help="filename prefix to match (default CleanShot)") + + p_plan = sub.add_parser("plan", help="build & validate rename plan") + p_plan.add_argument("--src", type=Path, required=True) + p_plan.add_argument("--prefix", default="CleanShot") + p_plan.add_argument("--max-words", type=int, default=8) + + p_exec = sub.add_parser("execute", help="apply rename plan with safety checks") + p_exec.add_argument("--src", type=Path, required=True) + + args = p.parse_args() + if args.cmd == "prep": + prep(args.src, args.batch_size, args.prefix) + elif args.cmd == "plan": + plan(args.src, args.prefix, args.max_words) + elif args.cmd == "execute": + execute(args.src) + + +if __name__ == "__main__": + main()