[{"data":1,"prerenderedAt":2162},["ShallowReactive",2],{"navigation":3,"index":39,"home-projects":67,"index-blogs":558},[4,26],{"title":5,"path":6,"stem":7,"children":8,"page":25},"Projects","\u002Fprojects","projects",[9,13,17,21],{"title":10,"path":11,"stem":12},"AntiAgent","\u002Fprojects\u002Fantiagent","projects\u002Fantiagent",{"title":14,"path":15,"stem":16},"Cubist","\u002Fprojects\u002Fcubist","projects\u002Fcubist",{"title":18,"path":19,"stem":20},"Eliane","\u002Fprojects\u002Feliane","projects\u002Feliane",{"title":22,"path":23,"stem":24},"Genario","\u002Fprojects\u002Fgenario","projects\u002Fgenario",false,{"title":27,"path":28,"stem":29,"children":30,"page":25},"Blog","\u002Fblog","blog",[31,35],{"title":32,"path":33,"stem":34},"Symbolic descent: gradient descent, applied to rules instead of weights","\u002Fblog\u002Fsymbolic-descent","blog\u002Fsymbolic-descent",{"title":36,"path":37,"stem":38},"Laws from single experiences: an online symbolic world-model for ARC-AGI-3","\u002Fblog\u002Fsymbolic-world-model","blog\u002Fsymbolic-world-model",{"id":40,"title":41,"blog":42,"body":45,"description":46,"extension":47,"hero":48,"meta":56,"navigation":57,"path":58,"seo":59,"stem":62,"work":63,"__hash__":66},"index\u002Findex.yml","Louis Manhès",{"title":43,"description":44},"Writing","Notes on the systems I build and the research behind them.",null,"Founder and machine-learning engineer. I build applied AI products, and I research symbolic autotelic agents: machines that set their own goals and learn the structure of their world as laws they can explain.","yml",{"links":49},[50],{"label":51,"to":52,"color":53,"variant":54,"icon":55},"Get in touch","mailto:louismanhes@icloud.com","neutral","ghost","i-lucide-mail",{},true,"\u002F",{"title":60,"description":61},"Louis Manhès, Founder & ML engineer","Louis Manhès is a founder and machine-learning engineer. He builds applied AI products and researches symbolic autotelic agents: machines that set their own goals and learn their world as readable laws.","index",{"title":64,"description":65},"Selected work","Products I have built or am building, and the research alongside them.","jKiu56QpDiS-Xw8PrKqHqQXcjAxyz5tsWVX1eOFnDr0",[68,162,239,299],{"id":69,"title":22,"body":70,"color":145,"date":146,"description":147,"extension":148,"featured":57,"icon":149,"image":45,"logo":150,"logoDark":45,"meta":151,"navigation":57,"order":152,"path":23,"seo":153,"status":154,"stem":24,"tagline":155,"tags":156,"url":160,"__hash__":161},"projects\u002Fprojects\u002Fgenario.md",{"type":71,"value":72,"toc":140},"minimark",[73,82,85,90,137],[74,75,76,77,81],"p",{},"Genario is AI software and services for screenwriting and audiovisual production, built for screenwriters, producers, and studios working on film, series, and documentary. The French company was founded in 2019 by Louis Manhès, a robotics and AI engineer, and David Defendi, a French screenwriter and novelist whose credits include the series ",[78,79,80],"em",{},"Braquo",". It remains active.",[74,83,84],{},"The work has two parts: the Genario application, a writing and analysis tool, and Genario Studio, a services arm. The application is organized as a set of distinct named functions rather than a single general chatbot, and it routes requests to current third-party AI models, swapping them as new ones become available.",[86,87,89],"h2",{"id":88},"what-it-does","What it does",[91,92,93,101,107,113,119,125,131],"ul",{},[94,95,96,100],"li",{},[97,98,99],"strong",{},"Brainstorming"," of ideas and directions.",[94,102,103,106],{},[97,104,105],{},"Script creation"," from an outline or source material, with an adjustable creativity setting.",[94,108,109,112],{},[97,110,111],{},"Adaptation"," of a script into a novel, a synopsis, or a scene-by-scene breakdown.",[94,114,115,118],{},[97,116,117],{},"Reading notes",", producing studio-style coverage on plot, pacing, and characters.",[94,120,121,124],{},[97,122,123],{},"Version comparison"," of two screenplay drafts, returning a change report.",[94,126,127,130],{},[97,128,129],{},"Corrections"," for grammar, style, and structure.",[94,132,133,136],{},[97,134,135],{},"Translation"," that preserves the source file format, so a PDF stays a PDF and an FDX stays an FDX.",[74,138,139],{},"Genario Studio handles custom AI work for producers and studios, spanning development, pre-production, and post-production. Reported usage is over 20,000 users and 45,000 projects, with industry ties to Banijay and Pathé Films and an association with the World AI Film Festival in Nice.",{"title":141,"searchDepth":142,"depth":142,"links":143},"",2,[144],{"id":88,"depth":142,"text":89},"#6366f1","2019-01-01","Genario is a French company founded in 2019 building AI software and services for screenwriting and audiovisual production, with a writing and analysis application and a custom-services arm, Genario Studio.","md","i-lucide-clapperboard","\u002Fprojects\u002Fgenario\u002Flogo.png",{},1,{"title":22,"description":147},"Active","AI software and services for screenwriting and audiovisual production",[157,158,159],"Screenwriting","Audiovisual","Script analysis","https:\u002F\u002Fgenario.fr\u002F","t49m3ey2D6aQapn1LTP9bInGHJ_cBGXXU3cb18xhq2E",{"id":163,"title":10,"body":164,"color":224,"date":225,"description":226,"extension":148,"featured":57,"icon":227,"image":45,"logo":228,"logoDark":229,"meta":230,"navigation":57,"order":142,"path":11,"seo":231,"status":154,"stem":12,"tagline":232,"tags":233,"url":237,"__hash__":238},"projects\u002Fprojects\u002Fantiagent.md",{"type":71,"value":165,"toc":220},[166,169,173,180,187,191,194,214,217],[74,167,168],{},"AntiAgent is a web-based learning tool that combines a document editor, an AI writing assistant, and a spaced-repetition scheduler. It is for people who want to study a subject or build a skill while keeping the cognitive work their own. Its premise is that learning should sharpen the user's own thinking rather than delegate it, summarized as \"Learning for humans. In the age of machine learning.\"",[86,170,172],{"id":171},"how-it-works","How it works",[74,174,175,176,179],{},"The unit of work is a ",[97,177,178],{},"page",": an editable document on a chosen subject or skill. Pages are written in a block editor that supports formatting, drag-to-reorder, and a slash menu.",[74,181,182,183,186],{},"Material can be ",[97,184,185],{},"imported"," from URLs, PDFs, YouTube, audio, and prior pages. An embedded agent can, on request, stream a researched and cited section directly into the page.",[86,188,190],{"id":189},"the-three-review-modes","The three review modes",[74,192,193],{},"Once a page exists, it generates three recurring review activities:",[91,195,196,202,208],{},[94,197,198,201],{},[97,199,200],{},"Memory",": typed-answer flashcards that the AI grades in plain language.",[94,203,204,207],{},[97,205,206],{},"Thinking",": Socratic dialogues, with modes to debate, rehearse, converse, or explain, and adjustable difficulty.",[94,209,210,213],{},[97,211,212],{},"Skill",": rubric-based exercises graded by the agent, where the rubric is written in the page itself.",[74,215,216],{},"These activities are not reviewed on a fixed cadence. Timing is set by the FSRS spaced-repetition algorithm, which schedules the next return for the day estimated retention is most at risk.",[74,218,219],{},"Pages can be published publicly in one click and forked by others; the creator keeps ownership and progress.",{"title":141,"searchDepth":142,"depth":142,"links":221},[222,223],{"id":171,"depth":142,"text":172},{"id":189,"depth":142,"text":190},"#0ea5e9","2026-01-01","AntiAgent is a web-based learning tool pairing a document editor and AI writing assistant with an FSRS spaced-repetition scheduler that generates memory, thinking, and skill reviews from pages you author.","i-lucide-notebook-pen","\u002Fprojects\u002Fantiagent\u002Flogo.svg","\u002Fprojects\u002Fantiagent\u002Flogo-dark.svg",{},{"title":10,"description":226},"An AI notebook where pages teach you back",[234,235,236],"Learning","Spaced repetition","FSRS","https:\u002F\u002Fwww.antiagent.io\u002F","uKrpU_ibVurRXztJGPNWxQPZBKTJV1rgjtpw_HWKrag",{"id":240,"title":18,"body":241,"color":284,"date":225,"description":285,"extension":148,"featured":57,"icon":286,"image":45,"logo":287,"logoDark":45,"meta":288,"navigation":57,"order":289,"path":19,"seo":290,"status":154,"stem":20,"tagline":291,"tags":292,"url":297,"__hash__":298},"projects\u002Fprojects\u002Feliane.md",{"type":71,"value":242,"toc":281},[243,246,249,251,254,260,266,272,278],[74,244,245],{},"Eliane is a French SaaS for independent home-care nurses (infirmiers libéraux, IDELs). It addresses one layer of their work: the coordination and renewal of prescriptions (ordonnances). It is not a billing tool, and it does not handle cotation, télétransmission, or tour scheduling, the functions broader IDEL suites already cover. The scope is deliberately narrow, limited to the prescription and its administrative validity.",[74,247,248],{},"Data is hosted in France and the service is GDPR-compliant. It runs on the web, iOS, and Android. Pricing is a single tier at 29 EUR excl. tax per month per nurse, with a 30-day trial and no card required.",[86,250,172],{"id":171},[74,252,253],{},"The workflow has four steps.",[74,255,256,259],{},[97,257,258],{},"Capture."," The nurse photographs a prescription. Eliane reads patient data, medications, prescriber, and special mentions, and detects bizone and secure prescription formats.",[74,261,262,265],{},[97,263,264],{},"Detect."," Eliane applies thirteen business rules in parallel to flag anomalies that could lead to a CPAM (French health-insurance) rejection: missing mandatory mentions such as \"à domicile\", vague or non-compliant durations such as \"jusqu'à cicatrisation\", drug interactions, and drug-class duplicates.",[74,267,268,271],{},[97,269,270],{},"Prepare."," Eliane drafts emails, specialist transmissions, and calendar reminders, with pre-alerts 7 to 10 days before a treatment deadline.",[74,273,274,277],{},[97,275,276],{},"Validate."," The nurse reviews and validates each item before it is sent, which maintains a compliance trail.",[74,279,280],{},"Around a single patient, several parties usually need to stay informed: the treating physician, a specialist, the hospital, the pharmacy, the patient and caregivers, and the PSAD (home health-care provider). Eliane coordinates this communication over email, SMS, and MSSanté; Pro Santé Connect is noted as forthcoming. The documentation covers twelve recurring clinical situations, from stable chronic conditions to palliative home care, diabetes, wound care, and controlled-substance prescriptions.",{"title":141,"searchDepth":142,"depth":142,"links":282},[283],{"id":171,"depth":142,"text":172},"#059669","Eliane is a French SaaS for independent home-care nurses that handles prescription coordination and renewal, reading ordonnances, detecting anomalies before CPAM rejection, and coordinating communication among the parties around a patient.","i-lucide-stethoscope","\u002Fprojects\u002Feliane\u002Flogo.png",{},3,{"title":18,"description":285},"Prescription coordination for French home-care nurses",[293,294,295,296],"Healthcare","Nursing","SaaS","Compliance","https:\u002F\u002Feliane.io\u002F","zZckpHuVELthjSk6tBEvvy2boAOlGMC5Nd_WMxaXLBc",{"id":300,"title":14,"body":301,"color":542,"date":225,"description":543,"extension":148,"featured":57,"icon":544,"image":545,"logo":45,"logoDark":45,"meta":546,"navigation":57,"order":547,"path":15,"seo":548,"status":549,"stem":16,"tagline":550,"tags":551,"url":45,"__hash__":557},"projects\u002Fprojects\u002Fcubist.md",{"type":71,"value":302,"toc":534},[303,307,341,348,352,355,361,367,373,376,386,389,392,396,411,415,418,425,436,440,443,469,472,476],[86,304,306],{"id":305},"the-mission","The mission",[74,308,309,310,313,314,317,318,321,322,329,330,329,335,340],{},"Watch a child in a new place. Nobody hands them a goal or a reward function. They poke at things, notice what surprises them, invent little challenges, and out of that self-directed play comes an ever-growing repertoire of skills. Developmental AI researchers call such a learner ",[97,311,312],{},"autotelic",", from the Greek ",[78,315,316],{},"auto"," (self) and ",[78,319,320],{},"telos"," (goal): an agent that invents, selects, and pursues its own goals, driven by curiosity rather than external reward (",[323,324,328],"a",{"href":325,"rel":326},"https:\u002F\u002Fwww.frontiersin.org\u002Fjournals\u002Fneurorobotics\u002Farticles\u002F10.3389\u002Fneuro.12.006.2007\u002Ffull",[327],"nofollow","Oudeyer & Kaplan, 2007","; ",[323,331,334],{"href":332,"rel":333},"https:\u002F\u002Fjmlr.org\u002Fpapers\u002Fv23\u002F21-0808.html",[327],"Forestier et al., 2022",[323,336,339],{"href":337,"rel":338},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2012.09830",[327],"Colas et al., 2022",").",[74,342,343,344,347],{},"Cubist's mission is to build one, with a twist. The autotelic agents of Oudeyer, Colas, and colleagues are usually powered by deep reinforcement learning. Cubist is symbolic all the way down. Its model of the world is a set of laws you can read. Its skills are small closed-loop programs. And the algorithm that improves both is a discrete cousin of gradient descent we call ",[97,345,346],{},"symbolic descent",".",[86,349,351],{"id":350},"the-bet-symbolic-not-gradient","The bet: symbolic, not gradient",[74,353,354],{},"Almost everything that learns at scale today learns by gradient descent: nudge billions of opaque weights down a loss. It works spectacularly well when data is plentiful and the world holds still. But an agent dropped into a new world, learning from a single unfolding life, needs three things that gradients make expensive.",[74,356,357,360],{},[97,358,359],{},"Continual learning."," A neural network trained on something new tends to silently overwrite what it knew, the classic problem of catastrophic forgetting (McCloskey & Cohen, 1989). A theory made of discrete laws does not have that failure mode. A law that is already correct produces no error, and a learner driven by errors never touches it. One experience is enough to add a law, and adding it breaks nothing.",[74,362,363,366],{},[97,364,365],{},"Reasoning."," An explicit theory is something you can do things with: query it, plan through it, spot the exact situations it does not cover. Every law is a falsifiable claim. The world either behaves as the law says, or the law gets revised. A weight vector predicts; a theory explains.",[74,368,369,372],{},[97,370,371],{},"Interpretability."," When a symbolic model fails, the failure has an address: this condition was too narrow, that rule missed an entity. When it succeeds, the model is its own documentation.",[74,374,375],{},"To make this concrete, here is a real theory Cubist learned on one of the benchmark games, in about eight seconds of play:",[377,378,383],"pre",{"className":379,"code":381,"language":382},[380],"language-text","# up key: the player (the only width-5 entity) moves 5 cells up\nACTION3: (self.width == 5) ⇒ move := (0, -5)\n\n# down key: the same player moves 5 cells down\nACTION4: (self.width == 5) ⇒ move := (0, 5)\n\n# right key: whatever sits on row 43 slides 5 cells right\nACTION2: (self.row == 43)  ⇒ move := (5, 0)\n\n# on every action, the entity with a direct neighbour (the life-bar)\n# shifts its colours one notch: time is running out\nany:     exists(nbrs(d1))  ⇒ recolor := (0,0,0,2, 0,0,0,0, 0,0,0,-2, 0,0,0,0)\n","text",[384,385,381],"code",{"__ignoreMap":141},[74,387,388],{},"Four laws, and you know the game. Each one states when it fires, who it applies to, and what happens. When the world contradicts a law, that law and only that law is revised.",[74,390,391],{},"The honest counterpart: gradients win where perception is raw and data is abundant. The symbolic bet targets the opposite regime, low data, a single life, and a demand for explanations. That happens to be the regime children, robots in the field, and scientific discovery all live in.",[86,393,395],{"id":394},"the-benchmark-arc-agi-3","The benchmark: ARC-AGI-3",[74,397,398,399,404,405,410],{},"Betting on a research direction means picking a test you cannot fool. Ours is ",[323,400,403],{"href":401,"rel":402},"https:\u002F\u002Farcprize.org\u002Farc-agi\u002F3",[327],"ARC-AGI-3",", the interactive-reasoning benchmark from the ARC Prize Foundation. The agent is dropped into a grid-world game with no instructions, no stated goal, and a handful of abstract actions, and must figure out what the game is while playing it. Scoring emphasizes sample efficiency: solving with fewer actions beats solving with more. That is ",[323,406,409],{"href":407,"rel":408},"https:\u002F\u002Farxiv.org\u002Fabs\u002F1911.01547",[327],"Chollet's definition of intelligence"," as skill-acquisition efficiency made operational, and a benchmark where the goal is never stated is exactly the right exam for an agent whose whole premise is inventing goals for itself.",[86,412,414],{"id":413},"where-it-stands","Where it stands",[74,416,417],{},"The world-model half is built and measured. Playing each of the 25 public ARC-AGI-3 games for a single 200-step life, with no pretraining, no gradients, and no game-specific tuning, Cubist learns each game's mechanics online as a small theory of laws, predicting every frame before learning from it. Held-out accuracy climbs from 0.46 in the first quarter of a run to 0.72 in the last, and 23 of 25 games end better than they started:",[74,419,420],{},[421,422],"img",{"alt":423,"src":424},"Per-game held-out prediction accuracy (F1), first quartile of the run versus final quartile. 23 of 25 games improve within a single 200-step life.","\u002Fprojects\u002Fcubist\u002Flearning-progress.png",[74,426,427,428,431,432,435],{},"The ",[323,429,430],{"href":37},"results post"," covers the representation, the learning algorithm, every metric, and where the model breaks. The ",[323,433,434],{"href":33},"method post"," works through symbolic descent itself and the full parallel with gradient descent.",[86,437,439],{"id":438},"whats-next-the-autotelic-loop","What's next: the autotelic loop",[74,441,442],{},"A model that predicts well is necessary but not sufficient. Prediction is not control, and on its own the world-model solves no levels. The second half of the program closes the loop:",[91,444,445,451,463],{},[94,446,447,450],{},[97,448,449],{},"Gaps become goals."," The model is honest: it never asserts a change it cannot justify, so its coverage gaps are a precise map of what is still unknown. That map is the agent's intrinsic motivation. Act where the theory is blind.",[94,452,453,456,457,462],{},[97,454,455],{},"Skills as programs."," Behaviours are grown as closed-loop programs over the theory's own vocabulary, scored by imagining them forward through the world-model before spending a real action, and kept in an open-ended repertoire the way ",[323,458,461],{"href":459,"rel":460},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2006.08381",[327],"DreamCoder"," grows a library of reusable abstractions.",[94,464,465,468],{},[97,466,467],{},"The loop."," Model the world, find what you cannot explain, invent a skill to probe it, and let the sharper model expose new gaps. Open-ended, self-directed, and readable at every step.",[74,470,471],{},"This is the part under active construction.",[86,473,475],{"id":474},"references","References",[91,477,478,489,499,509,516,524],{},[94,479,480,481,485,486,347],{},"Oudeyer & Kaplan (2007). ",[323,482,484],{"href":325,"rel":483},[327],"What is intrinsic motivation? A typology of computational approaches",". ",[78,487,488],{},"Frontiers in Neurorobotics",[94,490,491,492,485,496,347],{},"Colas, Karch, Sigaud & Oudeyer (2022). ",[323,493,495],{"href":337,"rel":494},[327],"Autotelic agents with intrinsically motivated goal-conditioned reinforcement learning: a short survey",[78,497,498],{},"JAIR 74",[94,500,501,502,485,506,347],{},"Forestier, Portelas, Mollard & Oudeyer (2022). ",[323,503,505],{"href":332,"rel":504},[327],"Intrinsically motivated goal exploration processes with automatic curriculum learning",[78,507,508],{},"JMLR 23(152)",[94,510,511,512,347],{},"Chollet (2019). ",[323,513,515],{"href":407,"rel":514},[327],"On the measure of intelligence",[94,517,518,519,347],{},"ARC Prize Foundation (2026). ",[323,520,523],{"href":521,"rel":522},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2603.24621",[327],"ARC-AGI-3: a new challenge for frontier agentic intelligence",[94,525,526,527,485,531,347],{},"Ellis et al. (2021). ",[323,528,530],{"href":459,"rel":529},[327],"DreamCoder: bootstrapping inductive program synthesis with wake-sleep library learning",[78,532,533],{},"PLDI",{"title":141,"searchDepth":142,"depth":142,"links":535},[536,537,538,539,540,541],{"id":305,"depth":142,"text":306},{"id":350,"depth":142,"text":351},{"id":394,"depth":142,"text":395},{"id":413,"depth":142,"text":414},{"id":438,"depth":142,"text":439},{"id":474,"depth":142,"text":475},"#d97706","Cubist is a research program on symbolic autotelic agents, agents that teach themselves an open-ended repertoire of skills. Gradient descent is replaced by symbolic descent. World-models are learned as readable programs, skills as closed-loop programs, and everything is measured on ARC-AGI-3.","i-lucide-box","\u002Fprojects\u002Fcubist\u002Fperception.png",{},4,{"title":14,"description":543},"Research","Building a symbolic autotelic agent: an AI that sets its own goals and learns its world as readable laws",[552,553,554,555,403,556],"Autotelic agents","Symbolic descent","World-models","Open-endedness","Interpretability","8CmPek1aJkK0sokgX8TS8knzVuEB1afssi4xS9PwJNs",[559,1741],{"id":560,"title":36,"author":561,"body":563,"date":1734,"description":1735,"extension":148,"image":424,"meta":1736,"minRead":1737,"navigation":57,"path":37,"project":1738,"seo":1739,"stem":38,"__hash__":1740},"blog\u002Fblog\u002Fsymbolic-world-model.md",{"name":41,"description":562},"Founder & ML engineer",{"type":71,"value":564,"toc":1724},[565,577,580,585,592,595,615,619,622,642,645,649,710,714,735,740,746,752,767,781,791,797,801,804,836,840,1487,1493,1525,1531,1546,1552,1558,1570,1576,1597,1603,1607,1617,1623,1629,1635,1638,1640],[74,566,567,569,570,573,574,347],{},[323,568,14],{"href":15}," is building an agent for ARC-AGI-3 that learns each game's mechanics as a small ",[97,571,572],{},"theory of symbolic laws",", online, from single experiences, with no pretraining and no gradients. This post presents the world-model half of that effort: the representation, the learning algorithm, and an evaluation across all 25 public games. The learning algorithm itself, symbolic descent, has ",[323,575,576],{"href":33},"its own post",[74,578,579],{},"The theory a game produces is human-readable. Here is a real one, learned in eight seconds of play:",[377,581,583],{"className":582,"code":381,"language":382},[380],[384,584,381],{"__ignoreMap":141},[74,586,587,588,591],{},"That is the movement system of the game ",[384,589,590],{},"ls20",", inferred from about thirty frames, and every law in it is a falsifiable claim the model keeps revising as evidence arrives.",[86,593,403],{"id":594},"arc-agi-3",[74,596,597,600,601,605,606,609,610,614],{},[323,598,403],{"href":401,"rel":599},[327]," is the interactive-reasoning benchmark from the ARC Prize Foundation (",[323,602,604],{"href":521,"rel":603},[327],"technical report","). An agent is dropped into a grid-world game of 16-colour frames, with an action space of up to five abstract keys plus a pixel click, and must figure out what the game is while playing it. The goal is never stated; the mechanics must be inferred, tested, and exploited, level by level. The public release ships 25 games spanning three interface families: keyboard-driven movement puzzles, click-driven games of buttons and cascades, and mixed ones. Scoring emphasizes ",[97,607,608],{},"sample efficiency",": solving with fewer actions beats solving with more, which is ",[323,611,613],{"href":407,"rel":612},[327],"Chollet's skill-acquisition-efficiency view of intelligence"," made operational. It is precisely the regime where learning explicit structure should pay.",[86,616,618],{"id":617},"why-a-symbolic-world-model","Why a symbolic world-model",[74,620,621],{},"Three properties we want that gradient world-models make expensive:",[91,623,624,630,636],{},[94,625,626,629],{},[97,627,628],{},"One-shot revision."," A game reveals a mechanic once; the model should account for it now, not after a thousand replays. Our learner revises its theory on every prediction error and is exact on the revised frame immediately.",[94,631,632,635],{},[97,633,634],{},"Verifiable exactness."," A symbolic law either reproduces an observed transition or it does not. There is no soft loss to hide behind, so the evaluation can demand that the theory explain every frame it was fit on, and can measure regression when an edit forgets the past.",[94,637,638,641],{},[97,639,640],{},"Legibility."," The theory is the explanation. When the model fails, the failure can be attributed to the component that caused it, and that attribution points to the next improvement.",[74,643,644],{},"We are explicit about what a world-model does not buy: prediction is not control. A model can predict a game well and still not solve it. Reward and planning are separate problems, and this post is about the modeling half.",[86,646,648],{"id":647},"related-work","Related work",[74,650,651,652,655,656,661,662,665,666,671,672,677,678,683,684,689,690,695,696,699,700,703,704,709],{},"Our approach sits at the intersection of several traditions. ",[97,653,654],{},"Inductive logic programming"," supplies the bones: our per-entity most-specific descriptions are Progol-style bottom clauses (",[323,657,660],{"href":658,"rel":659},"https:\u002F\u002Flink.springer.com\u002Farticle\u002F10.1007\u002FBF03037227",[327],"Muggleton, 1995","), generalized by contrast against negatives. ",[97,663,664],{},"Theory induction"," framings share our laws-as-programs stance: the Apperception Engine (",[323,667,670],{"href":668,"rel":669},"https:\u002F\u002Farxiv.org\u002Fabs\u002F1910.02227",[327],"Evans et al., 2020","), Schema Networks (",[323,673,676],{"href":674,"rel":675},"https:\u002F\u002Farxiv.org\u002Fabs\u002F1706.04317",[327],"Kansky et al., 2017","), and the recent program-synthesis world-models ",[323,679,682],{"href":680,"rel":681},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2505.10819",[327],"PoE-World"," and ",[323,685,688],{"href":686,"rel":687},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2402.12275",[327],"WorldCoder",". We differ in learning fully online, without an LLM proposer, from single transitions, a contrast that also holds against LLM coding-agent systems for ARC-AGI-3 itself (",[323,691,694],{"href":692,"rel":693},"https:\u002F\u002Farxiv.org\u002Fabs\u002F2605.05138",[327],"Rodionov, 2026","). ",[97,697,698],{},"State-merging automata induction"," (RPNI: Oncina & García, 1992; ALERGIA: Carrasco & Oncina, 1994) is the direct ancestor of our condensation step: start from total memorization, merge while a description-length criterion pays. And ",[97,701,702],{},"MDL"," (",[323,705,708],{"href":706,"rel":707},"https:\u002F\u002Fdoi.org\u002F10.1016\u002F0005-1098%2878%2990005-5",[327],"Rissanen, 1978",") is the single objective throughout. A law earns its bits or it does not exist.",[86,711,713],{"id":712},"method","Method",[74,715,716,719,720,723,724,727,728,727,731,734],{},[97,717,718],{},"Perception."," Each frame is segmented into tracked, object-centric entities carrying position, size, a colour histogram, their own velocity, and a local relation graph to their neighbours. Consecutive frames yield a ",[78,721,722],{},"transition",": per-entity changes on three axes (",[384,725,726],{},"move",", ",[384,729,730],{},"resize",[384,732,733],{},"recolor","), each change a fact the model must explain.",[74,736,737],{},[421,738],{"alt":739,"src":545},"What the agent perceives: a raw ARC-AGI-3 grid on the left, segmented into tracked, object-centric entities on the right. Those entities are the units the laws are written over.",[74,741,742,745],{},[97,743,744],{},"Laws."," Laws are written in a typed expression language over the scene graph: attribute reads, neighbour traversal, aggregation, arithmetic, comparison, and the click. A law has four parts:",[377,747,750],{"className":748,"code":749,"language":382},[380],"(action?, focus?, selector, transforms)\n",[384,751,749],{"__ignoreMap":141},[74,753,754,755,758,759,762,763,766],{},"Under a given action, if the clicked entity matches the ",[384,756,757],{},"focus"," condition, then every entity matching the ",[384,760,761],{},"selector"," (the who) undergoes every ",[384,764,765],{},"transform"," (the what). One law can speak to several axes at once, because entities overwhelmingly change several axes together.",[74,768,769,772,773,776,777,780],{},[97,770,771],{},"Abduction."," For every observed change, the learner enumerates every small expression that exactly computes it: constants, attribute reads off the entity, off the clicked entity, or off a unique neighbour, and one or two arithmetic combinations of these. This ",[78,774,775],{},"abduction set"," is the change's space of possible explanations, and it converts learning into set operations. Two changes obey the same law when their abduction sets intersect. Two laws generalize by anti-unification, keeping their shared structure and lifting clashing constants into computed expressions such as ",[384,778,779],{},"colour == the(touching).colour",". And the literal change is always present as a fallback, so the model can memorize what it cannot yet explain.",[74,782,783,786,787,790],{},[97,784,785],{},"Fitting."," The learner groups changes by shared explanation, takes the largest group still unexplained, and names it: it searches for the cheapest selector that admits the group and excludes every entity that stayed still or changed differently, growing the condition step by step and validating against counterexamples. Whatever no selector can name is patched with per-entity memorized laws. Finally, ",[78,788,789],{},"condensation"," merges laws that share an explanation whenever the merge saves description length, so patches dissolve into general laws as evidence accumulates. At every stage the theory remains exact on the data it was fit on.",[74,792,793,796],{},[97,794,795],{},"The online loop."," At every step the model predicts the incoming transition before learning from it. That prediction is the honest held-out score, one point of a learning curve. On any error, the model revises exactly the laws implicated by the error, refitting them over every past transition they answer for. Laws that remain general and consistent pass through untouched. The revision must explain the new transition while keeping the past, and both halves are measured at every step.",[86,798,800],{"id":799},"evaluation-protocol","Evaluation protocol",[74,802,803],{},"Each of the 25 public games is played for a single 200-step life under a uniformly random policy, with one shared configuration and no per-game tuning. We report:",[91,805,806,812,818,824,830],{},[94,807,808,811],{},[97,809,810],{},"F1",", the prediction quality. Every transition is predicted by the theory fit on transitions 1 to t−1, before the model learns from it (prequential scoring), so every number is held out by construction. We summarize by run quartile, because prediction is a curve, not a number.",[94,813,814,817],{},[97,815,816],{},"Revisions",", how often the theory had to change. A revision happens only when a prediction erred.",[94,819,820,823],{},[97,821,822],{},"Regressions",", revisions that lost accuracy on past transitions the edited laws covered. This is the continual-learning claim under test.",[94,825,826,829],{},[97,827,828],{},"Compression",", bits of data the theory explains per bit of theory. Above 1, the theory is smaller than the data it reproduces and has genuinely generalized; below 1, it is staying exact by memorizing.",[94,831,832,835],{},[97,833,834],{},"Coverage",", the fraction of all past changes the final theory reproduces exactly. The distance between coverage and F1 is the generalization gap, measured rather than assumed.",[86,837,839],{"id":838},"results","Results",[841,842,843,873],"table",{},[844,845,846],"thead",{},[847,848,849,853,855,858,861,864,867,870],"tr",{},[850,851,852],"th",{},"game",[850,854,810],{},[850,856,857],{},"F1 (Q1→Q4)",[850,859,860],{},"revisions",[850,862,863],{},"regressions",[850,865,866],{},"laws",[850,868,869],{},"compression",[850,871,872],{},"coverage",[874,875,876,907,937,962,988,1011,1036,1060,1083,1109,1133,1158,1181,1206,1228,1253,1276,1301,1323,1348,1372,1395,1418,1440,1463],"tbody",{},[847,877,878,882,887,893,896,899,902,905],{},[879,880,881],"td",{},"tn36",[879,883,884],{},[97,885,886],{},"0.93",[879,888,889,890],{},"0.77→",[97,891,892],{},"1.00",[879,894,895],{},"63",[879,897,898],{},"1",[879,900,901],{},"93",[879,903,904],{},"2.0",[879,906,892],{},[847,908,909,912,917,920,923,926,929,934],{},[879,910,911],{},"sp80",[879,913,914],{},[97,915,916],{},"0.89",[879,918,919],{},"0.72→0.93",[879,921,922],{},"34",[879,924,925],{},"0",[879,927,928],{},"23",[879,930,931],{},[97,932,933],{},"10.7",[879,935,936],{},"0.92",[847,938,939,942,947,950,953,955,957,960],{},[879,940,941],{},"sb26",[879,943,944],{},[97,945,946],{},"0.84",[879,948,949],{},"0.73→0.87",[879,951,952],{},"43",[879,954,925],{},[879,956,952],{},[879,958,959],{},"1.6",[879,961,892],{},[847,963,964,967,972,975,978,980,983,986],{},[879,965,966],{},"lf52",[879,968,969],{},[97,970,971],{},"0.83",[879,973,974],{},"0.83→0.83",[879,976,977],{},"134",[879,979,925],{},[879,981,982],{},"98",[879,984,985],{},"1.5",[879,987,892],{},[847,989,990,992,995,998,1001,1003,1006,1009],{},[879,991,590],{},[879,993,994],{},"0.81",[879,996,997],{},"0.84→0.82",[879,999,1000],{},"79",[879,1002,925],{},[879,1004,1005],{},"80",[879,1007,1008],{},"1.1",[879,1010,886],{},[847,1012,1013,1016,1019,1022,1025,1027,1030,1033],{},[879,1014,1015],{},"ar25",[879,1017,1018],{},"0.76",[879,1020,1021],{},"0.70→0.79",[879,1023,1024],{},"111",[879,1026,925],{},[879,1028,1029],{},"120",[879,1031,1032],{},"0.8",[879,1034,1035],{},"0.99",[847,1037,1038,1041,1044,1047,1050,1052,1055,1058],{},[879,1039,1040],{},"bp35",[879,1042,1043],{},"0.75",[879,1045,1046],{},"0.72→0.76",[879,1048,1049],{},"154",[879,1051,925],{},[879,1053,1054],{},"302",[879,1056,1057],{},"0.5",[879,1059,892],{},[847,1061,1062,1065,1068,1071,1073,1075,1078,1081],{},[879,1063,1064],{},"ft09",[879,1066,1067],{},"0.73",[879,1069,1070],{},"0.34→0.82",[879,1072,928],{},[879,1074,925],{},[879,1076,1077],{},"24",[879,1079,1080],{},"1.0",[879,1082,892],{},[847,1084,1085,1088,1091,1097,1100,1102,1104,1106],{},[879,1086,1087],{},"s5i5",[879,1089,1090],{},"0.72",[879,1092,1093,1094],{},"0.20→",[97,1095,1096],{},"0.96",[879,1098,1099],{},"75",[879,1101,898],{},[879,1103,895],{},[879,1105,959],{},[879,1107,1108],{},"0.98",[847,1110,1111,1114,1117,1120,1123,1125,1128,1131],{},[879,1112,1113],{},"g50t",[879,1115,1116],{},"0.71",[879,1118,1119],{},"0.50→0.80",[879,1121,1122],{},"112",[879,1124,925],{},[879,1126,1127],{},"122",[879,1129,1130],{},"0.6",[879,1132,1096],{},[847,1134,1135,1138,1141,1146,1148,1150,1153,1156],{},[879,1136,1137],{},"sc25",[879,1139,1140],{},"0.66",[879,1142,1143,1144],{},"0.39→",[97,1145,946],{},[879,1147,1029],{},[879,1149,925],{},[879,1151,1152],{},"130",[879,1154,1155],{},"1.4",[879,1157,1035],{},[847,1159,1160,1163,1166,1169,1171,1173,1176,1179],{},[879,1161,1162],{},"tu93",[879,1164,1165],{},"0.65",[879,1167,1168],{},"0.41→0.83",[879,1170,1127],{},[879,1172,925],{},[879,1174,1175],{},"148",[879,1177,1178],{},"0.9",[879,1180,886],{},[847,1182,1183,1186,1189,1192,1195,1198,1200,1203],{},[879,1184,1185],{},"tr87",[879,1187,1188],{},"0.64",[879,1190,1191],{},"0.49→0.79",[879,1193,1194],{},"140",[879,1196,1197],{},"2",[879,1199,1122],{},[879,1201,1202],{},"0.7",[879,1204,1205],{},"0.95",[847,1207,1208,1211,1214,1217,1219,1221,1224,1226],{},[879,1209,1210],{},"cd82",[879,1212,1213],{},"0.63",[879,1215,1216],{},"0.46→0.74",[879,1218,1122],{},[879,1220,925],{},[879,1222,1223],{},"116",[879,1225,1178],{},[879,1227,892],{},[847,1229,1230,1233,1236,1242,1245,1247,1249,1251],{},[879,1231,1232],{},"dc22",[879,1234,1235],{},"0.59",[879,1237,1238,1239],{},"0.49→",[97,1240,1241],{},"0.77",[879,1243,1244],{},"127",[879,1246,925],{},[879,1248,1194],{},[879,1250,1130],{},[879,1252,1205],{},[847,1254,1255,1258,1261,1264,1267,1269,1272,1274],{},[879,1256,1257],{},"sk48",[879,1259,1260],{},"0.57",[879,1262,1263],{},"0.33→0.66",[879,1265,1266],{},"139",[879,1268,898],{},[879,1270,1271],{},"306",[879,1273,1202],{},[879,1275,1035],{},[847,1277,1278,1281,1284,1289,1292,1294,1297,1299],{},[879,1279,1280],{},"su15",[879,1282,1283],{},"0.56",[879,1285,1286],{},[97,1287,1288],{},"0.07→0.91",[879,1290,1291],{},"37",[879,1293,925],{},[879,1295,1296],{},"25",[879,1298,1057],{},[879,1300,892],{},[847,1302,1303,1306,1309,1312,1314,1316,1319,1321],{},[879,1304,1305],{},"m0r0",[879,1307,1308],{},"0.53",[879,1310,1311],{},"0.41→0.63",[879,1313,1127],{},[879,1315,925],{},[879,1317,1318],{},"160",[879,1320,1032],{},[879,1322,1108],{},[847,1324,1325,1328,1331,1334,1337,1340,1343,1346],{},[879,1326,1327],{},"lp85",[879,1329,1330],{},"0.50",[879,1332,1333],{},"0.30→0.37",[879,1335,1336],{},"14",[879,1338,1339],{},"3",[879,1341,1342],{},"85",[879,1344,1345],{},"0.4",[879,1347,1116],{},[847,1349,1350,1353,1356,1359,1362,1364,1367,1369],{},[879,1351,1352],{},"wa30",[879,1354,1355],{},"0.47",[879,1357,1358],{},"0.23→0.55",[879,1360,1361],{},"125",[879,1363,925],{},[879,1365,1366],{},"119",[879,1368,1057],{},[879,1370,1371],{},"0.97",[847,1373,1374,1377,1379,1382,1384,1387,1390,1392],{},[879,1375,1376],{},"ka59",[879,1378,1355],{},[879,1380,1381],{},"0.38→0.60",[879,1383,1049],{},[879,1385,1386],{},"4",[879,1388,1389],{},"193",[879,1391,1057],{},[879,1393,1394],{},"0.94",[847,1396,1397,1400,1403,1406,1409,1411,1414,1416],{},[879,1398,1399],{},"re86",[879,1401,1402],{},"0.45",[879,1404,1405],{},"0.37→0.46",[879,1407,1408],{},"187",[879,1410,925],{},[879,1412,1413],{},"236",[879,1415,1130],{},[879,1417,1035],{},[847,1419,1420,1423,1426,1429,1431,1433,1436,1438],{},[879,1421,1422],{},"cn04",[879,1424,1425],{},"0.44",[879,1427,1428],{},"0.36→0.49",[879,1430,1266],{},[879,1432,925],{},[879,1434,1435],{},"144",[879,1437,1032],{},[879,1439,1371],{},[847,1441,1442,1445,1448,1451,1454,1456,1459,1461],{},[879,1443,1444],{},"vc33",[879,1446,1447],{},"0.40",[879,1449,1450],{},"0.26→0.50",[879,1452,1453],{},"175",[879,1455,1339],{},[879,1457,1458],{},"146",[879,1460,1178],{},[879,1462,936],{},[847,1464,1465,1468,1471,1474,1477,1479,1482,1485],{},[879,1466,1467],{},"r11l",[879,1469,1470],{},"0.37",[879,1472,1473],{},"0.26→0.36",[879,1475,1476],{},"183",[879,1478,1339],{},[879,1480,1481],{},"470",[879,1483,1484],{},"0.3",[879,1486,1096],{},[74,1488,1489,1492],{},[97,1490,1491],{},"The theory learns without forgetting."," Across roughly 2,700 revisions in 25 games, the theory always ends up explaining the frame it just erred on, and revisions that lost accuracy on the past total 18, a 0.7% rate. Generalization is not aspirational either: over 900 revisions, roughly one in three, ended with fewer and broader laws than they started with.",[74,1494,1495,1498,1499,1502,1503,1506,1507,1509,1510,1512,1513,1515,1516,1518,1519,1521,1522,1524],{},[97,1496,1497],{},"The headline is the curve, not the mean."," A whole-run average penalizes the model for having once been ignorant: at step one it knows nothing by construction, and every early error drags the mean down. Since every point is held out, the honest estimate of converged skill is the final quartile. There, the aggregate reads ",[97,1500,1501],{},"0.46 in the first quartile against 0.72 in the last",", with ",[97,1504,1505],{},"23 of 25 games ending better than they started"," (one flat; the one decline, ",[384,1508,590],{},", starts at its ceiling of 0.84). The standouts read like learning should: ",[384,1511,1280],{}," climbs from 0.07 to 0.91 within a single 200-step run, ",[384,1514,1087],{}," from 0.20 to 0.96, and ",[384,1517,881],{}," reaches a perfect 1.00 held-out F1 over its final hundred steps. Where a curve dips mid-run (",[384,1520,1327],{},", late ",[384,1523,1257],{},"), the cause is visible in the data: a level transition changed the mechanics, and the theory paid to re-learn them.",[74,1526,1527],{},[421,1528],{"alt":1529,"src":1530},"Held-out F1 by run quartile for all 25 public games, every point predicted before the model learned from it.","\u002Fprojects\u002Fcubist\u002Ff1-curves.png",[74,1532,1533,1536,1537,1539,1540,1542,1543,1545],{},[97,1534,1535],{},"The theory has a metabolism."," Theory size mostly grows as experience accumulates, but watch ",[384,1538,1064],{}," (15 laws down to 14) and ",[384,1541,1087],{}," (64 down to 62): the count falling while F1 rises is condensation digesting its own memorized patches into broader laws. ",[384,1544,911],{}," is the other extreme, holding about twenty laws essentially from the start with a compression of 10.7×. The game's whole visible dynamics fit in twenty falsifiable sentences.",[74,1547,1548],{},[421,1549],{"alt":1550,"src":1551},"Laws in the theory (bars) against held-out F1 (line) by run quartile, for ft09, s5i5 and sp80.","\u002Fprojects\u002Fcubist\u002Ftheory-metabolism.png",[74,1553,1554,1557],{},[97,1555,1556],{},"Prediction is held out; coverage is not."," Read the two columns together: a final-quartile F1 of 0.72 against a coverage around 0.96. The theory almost always explains its past, and the remaining distance to its one-step-ahead predictions is the generalization gap, measured per game rather than assumed.",[74,1559,1560,1563,1564,1566,1567,1569],{},[97,1561,1562],{},"Where the structure is real."," Eight games end with compression at or above 1.0, meaning the theory is smaller than the data it reproduces, topping out at 10.7× (",[384,1565,911],{},") and 2.0× (",[384,1568,881],{},"). Games below 1.0 are memorization-heavy: the model stays exact by patching what it cannot yet name, and says so.",[74,1571,1572],{},[421,1573],{"alt":1574,"src":1575},"Compression per game: bits of data explained per bit of theory. Above 1.0 the theory has genuinely generalized; below, it is staying exact by memorizing.","\u002Fprojects\u002Fcubist\u002Fcompression.png",[74,1577,1578,1581,1582,1584,1585,1587,1588,1590,1591,1593,1594,1596],{},[97,1579,1580],{},"Where it struggles."," Because every miss can be attributed to the component that blocked it, the diagnosis is unambiguous: on every hard game the blocking component is the ",[97,1583,761],{},", the part of a law that decides who it applies to (950 of ",[384,1586,1467],{},"'s misses, 703 of ",[384,1589,1257],{},"'s). The laws that would predict these changes exist; their selectors fail to extend to entities they have not seen. That is the single sharpest open problem this evaluation isolates. A smaller, separate residue is genuine ambiguity: on a few games (105 cells on ",[384,1592,1444],{},", 46 on ",[384,1595,1327],{},"), entities with identical observable context behave differently, which no selector over observable attributes can separate. The model reports these rather than averaging them away.",[74,1598,1599],{},[421,1600],{"alt":1601,"src":1602},"Every missed prediction attributed to the component that blocked it. The selector is the bottleneck on every hard game.","\u002Fprojects\u002Fcubist\u002Fmiss-kinds.png",[86,1604,1606],{"id":1605},"limitations","Limitations",[74,1608,1609,1612,1613,1616],{},[97,1610,1611],{},"Prediction is not control."," Nothing here selects actions; the driver is uniformly random, and when it clears a level anyway that is luck, not competence. A policy that exploits the theory is the next milestone, the ",[323,1614,1615],{"href":15},"autotelic half of the program",", and prior experiments warn us the coupling is not free.",[74,1618,1619,1622],{},[97,1620,1621],{},"Existence is unmodeled."," Entities that appear or vanish fall outside the three axes. This is a known, deferred fourth axis.",[74,1624,1625,1628],{},[97,1626,1627],{},"Ambiguity is real."," On some games, entities with identical observable context behave differently. No selector over observable attributes can separate them, and the model counts them honestly rather than averaging them away.",[74,1630,1631,1634],{},[97,1632,1633],{},"Cost grows with history."," Revising a law over everything it has ever explained is what keeps the no-forgetting guarantee, but it makes the busiest games expensive. Bounding that growth without giving up the guarantee is the main open question on the engineering side.",[74,1636,1637],{},"We would rather publish a small model whose every claim is checkable than a large one whose failures are invisible. Every number in this post comes from one shared configuration, and every prediction was made before the model saw the answer.",[86,1639,475],{"id":474},[91,1641,1642,1652,1657,1667,1675,1685,1695,1704,1711,1721],{},[94,1643,518,1644,1647,1648,347],{},[323,1645,523],{"href":521,"rel":1646},[327],". Benchmark: ",[323,1649,1651],{"href":401,"rel":1650},[327],"arcprize.org\u002Farc-agi\u002F3",[94,1653,511,1654,347],{},[323,1655,515],{"href":407,"rel":1656},[327],[94,1658,1659,1660,485,1664,347],{},"Muggleton (1995). ",[323,1661,1663],{"href":658,"rel":1662},[327],"Inverse entailment and Progol",[78,1665,1666],{},"New Generation Computing 13",[94,1668,1669,1670,1674],{},"Evans et al. (2020). ",[323,1671,1673],{"href":668,"rel":1672},[327],"Making sense of sensory input"," (the Apperception Engine).",[94,1676,1677,1678,485,1682,347],{},"Kansky et al. (2017). ",[323,1679,1681],{"href":674,"rel":1680},[327],"Schema Networks: zero-shot transfer with a generative causal model of intuitive physics",[78,1683,1684],{},"ICML",[94,1686,1687,1688,485,1692,347],{},"Piriyakulkij et al. (2025). ",[323,1689,1691],{"href":680,"rel":1690},[327],"PoE-World: compositional world modeling with products of programmatic experts",[78,1693,1694],{},"NeurIPS",[94,1696,1697,1698,485,1702,347],{},"Tang, Key & Ellis (2024). ",[323,1699,1701],{"href":686,"rel":1700},[327],"WorldCoder: building world models by writing code and interacting with the environment",[78,1703,1694],{},[94,1705,1706,1707,347],{},"Rodionov (2026). ",[323,1708,1710],{"href":692,"rel":1709},[327],"Executable world models for ARC-AGI-3 in the era of coding agents",[94,1712,1713,1714,485,1718,347],{},"Rissanen (1978). ",[323,1715,1717],{"href":706,"rel":1716},[327],"Modeling by shortest data description",[78,1719,1720],{},"Automatica 14",[94,1722,1723],{},"Oncina & García (1992). Inferring regular languages in polynomial updated time (RPNI). Carrasco & Oncina (1994). Learning stochastic regular grammars by means of a state merging method (ALERGIA).",{"title":141,"searchDepth":142,"depth":142,"links":1725},[1726,1727,1728,1729,1730,1731,1732,1733],{"id":594,"depth":142,"text":403},{"id":617,"depth":142,"text":618},{"id":647,"depth":142,"text":648},{"id":712,"depth":142,"text":713},{"id":799,"depth":142,"text":800},{"id":838,"depth":142,"text":839},{"id":1605,"depth":142,"text":1606},{"id":474,"depth":142,"text":475},"2026-07-03","Cubist's world-model learns each ARC-AGI-3 game's mechanics as a small theory of symbolic laws, online, from single experiences, with no pretraining and no gradients. This post presents the representation, the learning algorithm, and an evaluation across all 25 public games.",{},12,"cubist",{"title":36,"description":1735},"sVX5nyNR3Ylmf2z6Hq9oWm5NvzEuR_agtFfZnK6PaBQ",{"id":1742,"title":32,"author":1743,"body":1744,"date":2156,"description":2157,"extension":148,"image":545,"meta":2158,"minRead":2159,"navigation":57,"path":33,"project":1738,"seo":2160,"stem":34,"__hash__":2161},"blog\u002Fblog\u002Fsymbolic-descent.md",{"name":41,"description":562},{"type":71,"value":1745,"toc":2147},[1746,1759,1762,1771,1775,1793,1886,1897,1901,1908,1918,1956,1963,1967,1981,1986,1990,1995,2004,2010,2016,2022,2026,2029,2032,2034,2069,2076,2078],[74,1747,1748,1750,1751,1754,1755,1758],{},[323,1749,14],{"href":15}," is an attempt to build a ",[97,1752,1753],{},"symbolic autotelic agent",": one that sets its own goals and grows an open-ended repertoire of skills, in the tradition of intrinsically motivated agents (",[323,1756,339],{"href":337,"rel":1757},[327],"), but with every moving part readable. An agent like that needs a very particular kind of learner. It must learn from single experiences, because a one-life world grants no replays. It must not forget, because every skill it builds tomorrow stands on what it learned today. And it must know what it does not know, because its own ignorance is the map it explores by.",[74,1760,1761],{},"Gradient descent, which nudges a vector of weights down a loss one small step at a time, is a poor fit for all three. It needs many passes to fit, it overwrites old competence when trained on new data, and a weight vector cannot point to the place where it is ignorant.",[74,1763,1764,1766,1767,1770],{},[97,1765,553],{}," keeps the shape of gradient descent and changes the object. Instead of a vector of weights, the thing being optimized is a small, readable ",[97,1768,1769],{},"theory",": a set of typed laws. The claim of this post is that you can keep the whole machinery of gradient descent (loss, regularizer, gradient, mini-batch) and get something interpretable, stable under continual learning, and far more data-efficient in return. Cubist uses this to learn a world-model for ARC-AGI-3, but the method is general.",[86,1772,1774],{"id":1773},"the-correspondence","The correspondence",[74,1776,1777,1778,1781,1782,1785,1786,1789,1790,1792],{},"In parametric learning you minimize a loss ",[384,1779,1780],{},"L(θ)"," by stepping ",[384,1783,1784],{},"θ ← θ - η ∇L(θ)",". A regularizer biases toward simpler ",[384,1787,1788],{},"θ",", a mini-batch estimates the gradient from a sample, and early stopping watches a held-out loss. Symbolic descent minimizes the same kind of objective, but ",[384,1791,1788],{}," is a structured discrete theory rather than a vector. Every familiar piece has a counterpart.",[841,1794,1795,1804],{},[844,1796,1797],{},[847,1798,1799,1802],{},[850,1800,1801],{},"gradient descent",[850,1803,346],{},[874,1805,1806,1814,1822,1830,1838,1846,1854,1862,1870,1878],{},[847,1807,1808,1811],{},[879,1809,1810],{},"parameters: a weight vector",[879,1812,1813],{},"a theory: a set of typed laws",[847,1815,1816,1819],{},[879,1817,1818],{},"loss",[879,1820,1821],{},"the data cost (bits to encode the residual)",[847,1823,1824,1827],{},[879,1825,1826],{},"regularizer",[879,1828,1829],{},"the model cost (bits to write the laws; Occam, built in)",[847,1831,1832,1835],{},[879,1833,1834],{},"the gradient",[879,1836,1837],{},"the labelled errors (miss, over-fire, true delta)",[847,1839,1840,1843],{},[879,1841,1842],{},"the space of descent directions",[879,1844,1845],{},"the abduction set: every small program that explains the error",[847,1847,1848,1851],{},[879,1849,1850],{},"a gradient step",[879,1852,1853],{},"a directed edit (generalize, specialize, or patch)",[847,1855,1856,1859],{},[879,1857,1858],{},"a mini-batch",[879,1860,1861],{},"the revised laws' own support: every past transition they answer for",[847,1863,1864,1867],{},[879,1865,1866],{},"early stopping on validation loss",[879,1868,1869],{},"held-out, predict-before-learn scoring",[847,1871,1872,1875],{},[879,1873,1874],{},"weight decay",[879,1876,1877],{},"condensation: merging laws whenever the merged theory costs fewer bits",[847,1879,1880,1883],{},[879,1881,1882],{},"a convex bowl",[879,1884,1885],{},"a combinatorial lattice with real local minima",[74,1887,1888,1889,1892,1893,1896],{},"The objective is a two-part description length, ",[384,1890,1891],{},"L(T) + L(D | T)",": the bits to write the theory down, plus the bits to encode the data given it, paid wherever the theory mispredicts (",[323,1894,708],{"href":706,"rel":1895},[327],"). The two terms are loss and regularizer at once, which is the quiet but important part. A law earns its place if and only if it saves more bits in the residual than it costs to write. There is no precision threshold, no minimum-support count, nothing to tune. The bits decide.",[86,1898,1900],{"id":1899},"errors-are-the-gradient-abduction-is-the-direction-set","Errors are the gradient, abduction is the direction set",[74,1902,1903,1904,1907],{},"This is the load-bearing row of the table. A gradient tells you the direction in which a continuous loss falls fastest. In symbolic descent, the ",[97,1905,1906],{},"labelled prediction errors"," tell you exactly which edits can lower the description length. They do more than a gradient does, because they are labelled.",[74,1909,1910,1911,1914,1915,1917],{},"The mechanism is ",[97,1912,1913],{},"abduction",". For every observed change the model failed to explain, it enumerates every small expression that exactly computes that change: constants, attribute reads off the entity, reads off a unique neighbour, and one or two arithmetic combinations of these. This ",[78,1916,775],{}," is the space of possible explanations for the change, the discrete counterpart of the candidate descent directions. It turns learning into set algebra:",[91,1919,1920,1930,1950],{},[94,1921,1922,1925,1926,1929],{},[97,1923,1924],{},"Same regime = intersection."," Two changes belong to the same law when their abduction sets overlap. There is no clustering metric and no ",[384,1927,1928],{},"k"," to choose. If a small program explains both, they are one behaviour.",[94,1931,1932,1935,1936,683,1939,1942,1943,1945,1946,1949],{},[97,1933,1934],{},"Generalize = anti-unify."," Two laws merge by keeping their shared structure and lifting clashing constants into computed expressions. ",[384,1937,1938],{},"colour == 3",[384,1940,1941],{},"colour == 5"," become ",[384,1944,779],{}," when that expression sits in both abduction sets. This is the bottom-clause generalization of inductive logic programming (",[323,1947,660],{"href":658,"rel":1948},[327],"), run online.",[94,1951,1952,1955],{},[97,1953,1954],{},"Memorize = the guaranteed floor."," The literal change is always in the abduction set, so the model can always patch a change it cannot yet name. This is deliberate memorization, honestly accounted: a patch is expensive in bits, which is precisely the pressure to replace it with structure later.",[74,1957,1958,1959,1962],{},"A worked step makes it concrete. Suppose a law moves every red entity three cells up, and it fires correctly. Now a green entity moves the same way under the same action, and no law covers it: one miss. The green entity's abduction set contains ",[384,1960,1961],{},"move := (0, -3)","; so does the red law's. Non-empty intersection: same regime. Anti-unification drops the colour condition, and the merged law covers both. The model cost falls because one condition is gone, the data cost falls because the green move is now explained, and no red entity is disturbed. One labelled miss has moved the theory exactly one step downhill.",[86,1964,1966],{"id":1965},"weight-decay-that-actually-deletes","Weight decay that actually deletes",[74,1968,1969,1970,1973,1974,1977,1978,1980],{},"Gradient descent shrinks weights toward zero; symbolic descent has something better. ",[97,1971,1972],{},"Condensation"," sweeps the theory for pairs of laws that share an explanation and merges them whenever the merged theory costs fewer bits, best saving first, until nothing improves. Patches dissolve into laws as evidence accumulates, and the theory loses laws while gaining accuracy. In our ",[323,1975,1976],{"href":37},"benchmark"," you can watch it happen: on game ",[384,1979,1064],{}," the law count falls from 15 to 14 across the run while held-out accuracy rises, and roughly one revision in three across all 25 games ends with fewer, broader laws than it started with.",[74,1982,1983],{},[421,1984],{"alt":1985,"src":1551},"Laws in the theory (bars) against held-out F1 (line) across the run, for three games. The count falling while accuracy rises is condensation digesting its own patches.",[86,1987,1989],{"id":1988},"what-the-method-buys","What the method buys",[74,1991,1992,1993,340],{},"The parallel is pretty, but the reason to care is what comes out of it. Each claim below is a measured number, not an aspiration (full protocol in the ",[323,1994,430],{"href":37},[74,1996,1997,2000,2001,347],{},[97,1998,1999],{},"Continual learning without catastrophic forgetting."," Each revision touches exactly the laws implicated by the error, re-fit over their own support: every past transition they answer for. A law that is already correct produces no error, and the descent never disturbs it. Learning something new therefore has no way to silently break something old, the failure mode that has haunted connectionist learners since McCloskey & Cohen (1989). Measured: across ~2,700 revisions over 25 games, only 18 revisions lost any accuracy on their laws' own past, a ",[97,2002,2003],{},"0.7% regression rate",[74,2005,2006,2009],{},[97,2007,2008],{},"Generalization instead of memorization, and you can read the margin."," Because the model cost is real bits, a law that merely stores what it has seen is expensive and loses to one that compresses. The objective actively prefers the general law over the lookup table, and the margin is a single number: the compression ratio, bits of data explained per bit of theory. Eight of 25 games end above 1.0, meaning genuine structure, topping out at 10.7×: a game's whole visible dynamics in about twenty falsifiable sentences. Games below 1.0 are memorization-heavy, and the number says so rather than hiding it.",[74,2011,2012,2015],{},[97,2013,2014],{},"Data efficiency."," A single informative experience justifies a single edit, applied immediately: the model is exact on the frame it just erred on. There is no batch to accumulate and no epoch to re-run, which is exactly the resource a one-life, learn-while-playing setting denies a network.",[74,2017,2018,2021],{},[97,2019,2020],{},"Interpretability and honest uncertainty."," The parameter is a list of laws you can read, so every prediction traces to the law that produced it, and every failure decomposes into the component that blocked it: a gate, a selector, a transform. And what no law explains is left unchanged rather than guessed, so the model's silence is a real coverage gap. That is the very signal an autotelic agent turns into a goal.",[86,2023,2025],{"id":2024},"where-the-analogy-bites","Where the analogy bites",[74,2027,2028],{},"It is a lens, not an identity. There is no continuous gradient: the space is discrete, so \"the gradient\" is really the finite set of abducible edits and their cost differences, and the step is the argmin over that set. The terrain is a combinatorial lattice with real local minima, where a smooth convex loss has none, so a misleading early edit can trap the descent. The honest framing is directed discrete local search, but one whose direction set is constructed from the data by abduction, not sampled blindly the way evolutionary methods probe fitness.",[74,2030,2031],{},"The parametric toolbox still transfers. The model cost is already a complexity penalty, and richer priors slot in the same way. Beams, restarts, and annealing are drop-in optimizers. The replay reservoir is the mini-batch, and held-out predict-before-learn scoring is early stopping.",[86,2033,648],{"id":647},[74,2035,2036,2037,2039,2040,661,2043,2045,2046,671,2049,677,2052,683,2055,2058,2059,695,2062,2064,2065,2068],{},"Symbolic descent sits at the intersection of several traditions. ",[97,2038,654],{}," supplies the bones: per-entity most-specific descriptions are Progol-style bottom clauses (",[323,2041,660],{"href":658,"rel":2042},[327],[97,2044,664],{}," framings share the laws-as-programs stance: the Apperception Engine (",[323,2047,670],{"href":668,"rel":2048},[327],[323,2050,676],{"href":674,"rel":2051},[327],[323,2053,688],{"href":686,"rel":2054},[327],[323,2056,682],{"href":680,"rel":2057},[327],". The salient difference: those recent systems use an LLM to propose the programs, where symbolic descent learns fully online, without an LLM proposer, from single transitions. The same contrast holds against LLM coding-agent approaches to ARC-AGI-3 (",[323,2060,694],{"href":692,"rel":2061},[327],[97,2063,698],{}," (RPNI: Oncina & García, 1992; ALERGIA: Carrasco & Oncina, 1994) is the direct ancestor of condensation: start from total memorization, merge while a description-length criterion pays. And ",[323,2066,461],{"href":459,"rel":2067},[327],"'s wake-sleep library learning is the closest relative of what comes next for Cubist: growing a repertoire of skills by the same compression pressure that grows the laws.",[74,2070,2071,2072,2075],{},"The point of all this is not a clever analogy. It is that an agent meant to learn continually, and to set its own goals, needs a learner that does not forget, generalizes from little data, and knows what it does not know. Symbolic descent is an attempt at exactly that learner. The ",[323,2073,2074],{"href":37},"companion post"," puts it to work as a world-model across all 25 public ARC-AGI-3 games and reports how well it does, curves, tables, and failures included.",[86,2077,475],{"id":474},[91,2079,2080,2087,2094,2100,2102,2107,2114,2121,2128,2133,2140],{},[94,2081,1659,2082,485,2085,347],{},[323,2083,1663],{"href":658,"rel":2084},[327],[78,2086,1666],{},[94,2088,1713,2089,485,2092,347],{},[323,2090,1717],{"href":706,"rel":2091},[327],[78,2093,1720],{},[94,2095,2096,2097,347],{},"McCloskey & Cohen (1989). Catastrophic interference in connectionist networks. ",[78,2098,2099],{},"Psychology of Learning and Motivation 24",[94,2101,1723],{},[94,2103,1669,2104,1674],{},[323,2105,1673],{"href":668,"rel":2106},[327],[94,2108,1677,2109,485,2112,347],{},[323,2110,1681],{"href":674,"rel":2111},[327],[78,2113,1684],{},[94,2115,1697,2116,485,2119,347],{},[323,2117,1701],{"href":686,"rel":2118},[327],[78,2120,1694],{},[94,2122,1687,2123,485,2126,347],{},[323,2124,1691],{"href":680,"rel":2125},[327],[78,2127,1694],{},[94,2129,1706,2130,347],{},[323,2131,1710],{"href":692,"rel":2132},[327],[94,2134,526,2135,485,2138,347],{},[323,2136,530],{"href":459,"rel":2137},[327],[78,2139,533],{},[94,2141,491,2142,485,2145,347],{},[323,2143,495],{"href":337,"rel":2144},[327],[78,2146,498],{},{"title":141,"searchDepth":142,"depth":142,"links":2148},[2149,2150,2151,2152,2153,2154,2155],{"id":1773,"depth":142,"text":1774},{"id":1899,"depth":142,"text":1900},{"id":1965,"depth":142,"text":1966},{"id":1988,"depth":142,"text":1989},{"id":2024,"depth":142,"text":2025},{"id":647,"depth":142,"text":648},{"id":474,"depth":142,"text":475},"2026-07-01","Symbolic descent keeps the shape of gradient descent but changes the object it optimizes, from a weight vector to a readable theory of laws. This post explains the parallel, the machinery that makes it work, and what it buys for continual learning, reasoning, and interpretability.",{},10,{"title":32,"description":2157},"Zu328ZPnzw_FV2NtHRhZGsYuVFdpK6XClxjbnH7ZWMA",1783074437287]