diff --git a/.github/workflows/pr-validation.yml b/.github/workflows/pr-validation.yml index 039b5b2..c4da95b 100644 --- a/.github/workflows/pr-validation.yml +++ b/.github/workflows/pr-validation.yml @@ -30,6 +30,8 @@ jobs: - name: Report Status uses: actions/github-script@v7 with: + retries: 3 + retry-exempt-status-codes: 400,401,403,404,422 script: | const status = '${{ needs.validate.result }}'; const icon = status === 'success' ? '✅' : '❌'; diff --git a/.gitignore b/.gitignore index 503256c..a242a98 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,9 @@ dist/ # generated types .astro/ +# conductor build artifacts +.conductor/ + # dependencies node_modules/ diff --git a/cspell.json b/cspell.json index a040542..32e183b 100644 --- a/cspell.json +++ b/cspell.json @@ -16,10 +16,13 @@ "Claude", "Codegen", "cplusplus", + "cppreference", + "decltype", "devcontainer", "doesn", "expressibility", "frontmatter", + "genericity", "HDXLXC", "IIRC", "imread", @@ -47,11 +50,14 @@ "subclassing", "Tailgraph", "Tailwindcss", + "textlint", "todolist", "Typesafe", "uncategorized", "Uncategorized", "webfetch", + "worktree", + "worktrees", "WWDC", "xctest" ], @@ -122,4 +128,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/eslint.config.js b/eslint.config.js index ef5c903..764f873 100644 --- a/eslint.config.js +++ b/eslint.config.js @@ -10,7 +10,7 @@ export default [ // Global ignores { - ignores: ["dist/", "node_modules/", ".astro/"] + ignores: ["dist/", "node_modules/", ".astro/", ".conductor/"] }, // JavaScript and TypeScript files diff --git a/public/default-og-image.jpg b/public/default-og-image.jpg new file mode 100644 index 0000000..4251ce9 Binary files /dev/null and b/public/default-og-image.jpg differ diff --git a/public/favicon-dark.svg b/public/favicon-dark.svg deleted file mode 100644 index f6ac691..0000000 --- a/public/favicon-dark.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - diff --git a/public/favicon-light.svg b/public/favicon-light.svg deleted file mode 100644 index 58d82e3..0000000 --- a/public/favicon-light.svg +++ /dev/null @@ -1,11 +0,0 @@ - - - - - - - - - - - diff --git a/src/content/blog/claude-code-skills/index.md b/src/content/blog/claude-code-skills/index.md new file mode 100644 index 0000000..d788db4 --- /dev/null +++ b/src/content/blog/claude-code-skills/index.md @@ -0,0 +1,75 @@ +--- +title: "Claude Code Skills" +cardTitle: "Claude Code Skills" +description: "On the performance work that actually makes your app feel fast (but nobody talks about)" +date: 2025-10-18 +draft: true +--- + +## Claude Code Skills + +### Jointly Maximizing Context Efficiency and Predictability + +1. Keep CLAUDE.md extremely *lightweight*: + - brief project overview + - project-structure +2. Use project-level skills for project-specific tasks: + - "Building $Project": skill explaining how to build project + - "Testing $Project": skill explaining how to test the project + - "Opening a PR for $Project": skill explaining how to open a PR for the project + - Language style guides, file organization guides, etc. +3. Create +4. Use slash commands to compress delegation to subagents: + - Protect main agent context by letting it use slash commands when dispatching sub-agents "run the /foo:bar command on $quux"). + + +Here's something that's been bothering me: we have great terminology for performance work at the extremes, but there's no agreed-upon shorthand for all the little details that actually makes apps feel fast. + +At one end, we have **micro-optimization**—the close-to-the-metal stuff. SIMD instructions, cache line optimization, atomic operations. The kind of work where you stare at assembly output and mutter about pipeline stalls. + +At the other end, there's **macro-optimization**—the big architectural decisions. Choosing the right data structure, adding database indices, implementing caching layers. The stuff that shows up in system design interviews. + +In between the extremes, there's a whole world of performance work that doesn't have a name. The stuff that's too high-level to be micro-optimization but too implementation-focused to be macro-optimization. The work that makes your app feel snappy, but doesn't show up in any benchmarks. Things like: + +- Making sure you don't trigger unnecessary UI updates +- Avoiding those temporary arrays that Swift loves to create +- Prefetching content before users need it +- Not accidentally retaining objects longer than necessary + +I'm calling this **meso-optimization**, and yes, I made that term up. Will it catch on? To be frank, I hope not—I'd find it mortifying if I went to a conference and heard someone using it in earnest. But, well, I need a shorthand term—and a tag, once this blog gains tags—and "that middle-ground stuff" just wasn't cutting it. So, yeah, there we are: time for some real talk about **meso-optimization**. + +## Why Meso-Optimization Matters (And Why It's Hard) + +Meso-optimization has a different focus area from its smaller and larger brethren, but it's not just about the substance—it's also about the process, and the process of meso-optimization is different from the other two: + +**Micro and macro-optimization are reactive.** Your profiler screams at you about a hot function. Your database queries are taking 30 seconds. You have a specific villain to defeat. You optimize it, you measure the improvement, you ship it, you're a hero. + +**Meso-optimization is proactive.** It's about consistently applying patterns that prevent performance problems from emerging in the first place. It's playing defense across your entire codebase, not just fixing the parts that are currently on fire. + +**Micro and macro-optimization are targeted.** That one method is slow. That one component uses too much memory. You focus your energy like a laser on the problem spot. + +**Meso-optimization is diffused.** It's everywhere and nowhere. It's making sure every view controller releases properly. It's using lazy sequences consistently. It's death by a thousand cuts, except in reverse—life by a thousand tiny good decisions. + +**Micro and macro-optimization are measurable.** "This function is now 70% faster." "Memory usage dropped by 2GB." "Time to first byte improved by 400ms." This is good, I like numbers, graphs, and things you can measure—when you can, you should! + +**Meso-optimization is...vibes (laudatory).** The app feels snappier, scrolling feels smoother, users say "it feels better." Can you quantify any of that? *Kinda*, via proxies like TTI and slow frames, but they're just the fingers (metrids) pointing at the moon (immaculate vibes). + +## The Craftsmanship Connection + +Here's what I've come to believe: micro and macro-optimization are situational skills you deploy when needed. Meso-optimization, on the other hand—that's craftsmanship. It's the accumulated effect of hundreds of small decisions made right. It's what bridges the gap between code that happens to work and code that *feels good to use*. + +When someone says an app feels "native" or "polished," they're often responding to good meso-optimization[^1]. It's not that the app has some genius algorithm or hand-tuned assembly, it's that the team cared enough to dot every i, cross every t, and let no cycle go wasted. + +[^1]: Many of the best "native apps" aren't even native—there's lots of world-class, incredibly-polished Reactive Native out there. + +The frustrating part? This stuff is hard to teach, hard to measure, and incredibly easy to break. One well-meaning refactor can silently undo a meso-optimization, or even several. [Your beautiful lazy sequence chains suddenly become eager](/briefs/swift-warts/lazy-sequences-decay-easily). Your carefully-managed view updates start firing twice. The app still works, all your tests pass, but somehow it doesn't feel quite as good anymore. + +## The Point of All This + +Why invent terminology for something that's hard to define, harder to measure, and—bluntly—cringe-worthy to use? Because I think we need to talk about it more. + +Too much performance discussion focuses on the extremes—either we're debating whether to use `structs` vs `classes` (micro) or whether to use REST vs GraphQL (macro). Meanwhile, the actual day-to-day performance characteristics of most apps are determined by this unnamed middle layer of decisions. + +Meso-optimization isn't the only thing I'll be writing about on this blog, but expect it to be a recurring topic: expect deep dives on patterns that work, traps to avoid, and ways to maintain these optimizations as your codebase evolves. + +None of this stuff is individually earth-shattering, and none of it is going to make anything "10x faster"...but they might just make your apps vibes 10x better. diff --git a/src/content/blog/generic-testing/index.md b/src/content/blog/generic-testing/index.md new file mode 100644 index 0000000..66e58a5 --- /dev/null +++ b/src/content/blog/generic-testing/index.md @@ -0,0 +1,476 @@ +--- +title: "*Generic* Testing For Generic Swift Code" +cardTitle: "Testing Generic Swift Code, *Generically*" +description: "A practical approach to writing generic tests for generic Swift code." +date: "2025-10-18" +draft: true +--- + +## Introduction + +This article explores the concept of "generic testing"—writing test suites that are *themselves* generic, and can thus be evaluated against multiple concrete types. This is particularly important when testing generic code whose behavior subtly depends on the specific types being used, even when those dependencies aren't fully captured by the generic constraints. + +### What Is Generic Testing? + +Generic testing is about writing tests that mirror the genericity of the code being tested. Instead of manually writing separate test functions for each concrete type you want to test, you write the test logic *once* as generic code, then arrange for that logic to be executed against each concrete type of interest. + +Consider a generic type like `LinearSpan`: + +```swift +struct LinearSpan where Representation: BinaryFloatingPoint { + + var lowerBound: Representation + var length: Representation + var upperBound: Representation { lowerBound + length } + + var center: Representation { lowerBound + (length / 2) } + + func contains(_ coordinate: Representation) -> Bool { + lowerBound <= coordinate && coordinate <= upperBound + } + + func translated(by offset: Representation) -> LinearSpan { + LinearSpan(lowerBound: lowerBound + offset, length: length) + } +} +``` + +This type relies on generic floating-point arithmetic, which can be surprisingly subtle. With 16-bit floats (`Float16`), numerical surprises arise even at typical UI scales—for instance: + +- ✅ `Float16(2048) - Float16(1) == Float16(2047)`: this works as-expected +- ❌ `Float16(2048) + Float16(1) == Float16(2049)`: this fails b/c the next `Float16` after 2048 is `2050` + +...which can, in turn, lead to otherwise-correct looking code producing unexpected results. + +Consider the following test, which verifies a seemingly-trivial property of `LinearSpan`: "the center of a span with a non-zero length is not an endpoint": + +```swift +import Testing + +@Test +func `LinearSpan.center is not the endpoint`() { + let span = LinearSpan(lowerBound: 2048.0, length: 1) // inferred to be `LineraSpan` + // all of these pass for `Double`, but not for `Float16`: + #expect(span.lowerBound < span.upperBound) + #expect(span.lowerBound < span.center) + #expect(span.center < span.upperBound) +} +``` + +Since this is an article about generic testing, we'll focus on testing: discovering this won't dwell on the numerical aspects focus on the testing aspects. Since this isn't an article about floating-point numerics, I'm not going to dwell on the numerical aspects much further. + +Instead, I'm go + +Without generic testing, we'd be stuck writing repetitive boilerplate like: + +```swift +@Test(arguments: [0.0, 0.01, 1.0]) +func nonZeroLengthImpliesDistinctBounds_Double(length: Double) { + let span = LinearSpan(lowerBound: 0, length: length) + #expect(span.lowerBound < span.upperBound || length == 0.0) +} + +@Test(arguments: [0.0, 0.01, 1.0] as [Float]) +func nonZeroLengthImpliesDistinctBounds_Float(length: Float) { + let span = LinearSpan(lowerBound: 0, length: length) + #expect(span.lowerBound < span.upperBound || length == 0.0) +} + +@Test(arguments: [0.0, 0.01, 1.0] as [Float16]) +func nonZeroLengthImpliesDistinctBounds_Float16(length: Float16) { + let span = LinearSpan(lowerBound: 0, length: length) + #expect(span.lowerBound < span.upperBound || length == 0.0) +} +``` + +This approach doesn't scale: it's tedious to write, difficult to maintain, and the duplication makes it easy for tests to drift out of sync when requirements change. + +### Desired Properties for Generic Testing + +An ideal generic testing strategy should have several key properties: + +**Write tests generically:** Following the famous "M + N instead of M × N" principle from generic programming, we want to write M test functions that can work with N types, not M × N separate test implementations. + +**Minimal invocation boilerplate:** While classic generic programming is about *writing* M algorithms for N types, testing requires actually *invoking* each of the M × N test-type pairs. Our case deviates from the classic formulation because we need some mechanism to trigger execution of each combination, but we want this overhead to be as lightweight as possible. + +**Standard framework integration:** The solution should work within vanilla XCTest or Swift Testing, not require a complex custom framework layered on top. Each test-type pair should be individually runnable and debuggable from both Xcode's GUI and the command line. + +### Overview of Topics + +We'll explore three interconnected topics in this article: + +1. **The XCTest strategy:** Using generic test-case base classes that get subclassed for each concrete type—a solution that satisfies all our requirements +2. **Improvement techniques:** Ways to enhance the XCTest approach through better value provisioning and validation helper functions +3. **Swift Testing limitations:** Why there's currently no equivalently satisfactory approach for Swift Testing, despite it being more modern + +## The XCTest Strategy + +XCTest provides an elegant solution to generic testing through class inheritance. The approach involves creating a generic base test class containing all test methods, then creating lightweight concrete subclasses for each type you want to test. + +### Basic Implementation + +Let's start with a simple example testing `LinearSpan`. First, we create a generic base class: + +```swift +class LinearSpanTests: XCTestCase where Representation: BinaryFloatingPoint { + + // Abstract method for subclasses to provide test values + func representativeSpans() -> [LinearSpan] { + // Base implementation returns empty array + // Subclasses must override to provide actual test data + return [] + } + + // Generic test checking point containment + func testPointContainment() { + let spans = representativeSpans() + XCTAssertFalse(spans.isEmpty, "Subclass must provide test spans") + + for span in spans { + // Points that should be inside + XCTAssertTrue(span.contains(span.lowerBound), "Lower bound should be contained") + XCTAssertTrue(span.contains(span.upperBound), "Upper bound should be contained") + + // Calculate midpoint (being careful about overflow) + let center = span.lowerBound + (span.length / 2) + if span.lowerBound < span.upperBound { + XCTAssertTrue(span.contains(center), "Center should be contained") + } + + // Points that should be outside + let before = span.lowerBound - abs(span.length) + let after = span.upperBound + abs(span.length) + XCTAssertFalse(span.contains(before), "Point before span shouldn't be contained") + XCTAssertFalse(span.contains(after), "Point after span shouldn't be contained") + } + } +} +``` + +Then we create concrete subclasses for each type we want to test: + +```swift +final class DoubleLinearSpanTests: LinearSpanTests { + override func representativeSpans() -> [LinearSpan] { + return [ + LinearSpan(lowerBound: 0, length: 1), + LinearSpan(lowerBound: -100, length: 200), + LinearSpan(lowerBound: 1e-10, length: 1e-8) + ] + } +} + +final class FloatLinearSpanTests: LinearSpanTests { + override func representativeSpans() -> [LinearSpan] { + return [ + LinearSpan(lowerBound: 0, length: 1), + LinearSpan(lowerBound: -100, length: 200), + LinearSpan(lowerBound: 1e-6, length: 1e-4) // Adjusted for Float precision + ] + } +} + +final class Float16LinearSpanTests: LinearSpanTests { + override func representativeSpans() -> [LinearSpan] { + return [ + LinearSpan(lowerBound: 0, length: 1), + LinearSpan(lowerBound: -100, length: 200), + LinearSpan(lowerBound: 0.001, length: 0.1) // Much coarser due to Float16 limits + ] + } +} +``` + +### Xcode Quirks and Caveats + +While this technique works well, Xcode sometimes exhibits quirky behavior with generic test classes: + +- The test navigator may occasionally show the generic base class as runnable (it shouldn't be) +- Test discovery might briefly fail to recognize new concrete subclasses until you build +- Error messages in failed assertions sometimes show the base class name rather than the concrete subclass + +None of these issues affect the actual execution of tests, but they can be momentarily confusing during development. + +### Why This Approach Is Satisfactory + +The XCTest strategy meets all our desired criteria: + +**Truly generic test logic:** The test methods in the base class are written once and contain no type-specific code. All type-specific behavior is isolated to the concrete subclasses. + +**Minimal dispatch overhead:** Creating a new test target requires only: +- Declaring a subclass (one line of code) +- Overriding methods to provide test values (typically just a few lines) + +Each concrete subclass automatically inherits all test methods from the base class, and XCTest's runtime handles test discovery and execution. The result is that adding a new type to test requires minimal boilerplate while maintaining full integration with Xcode's test runner. + +## Improving the XCTest Approach + +While the basic XCTest strategy works well, we can enhance it in two key ways: using protocols for systematic value provisioning and extracting validation logic into helper functions. + +### Generic Protocols for Test Values + +Rather than having each subclass independently implement `representativeSpans()`, we can use protocols to systematize how test values are provided: + +```swift +protocol LinearSpanTestValueProviding: BinaryFloatingPoint { + static var representativeSpanParameters: [(lowerBound: Self, length: Self)] { get } + static var boundaryCases: [Self] { get } + static var typicalValues: [Self] { get } +} + +// Provide conformances for our test types +extension Double: LinearSpanTestValueProviding { + static let representativeSpanParameters = [ + (lowerBound: 0.0, length: 1.0), + (lowerBound: -100.0, length: 200.0), + (lowerBound: 1e-10, length: 1e-8), + (lowerBound: .leastNormalMagnitude, length: .ulpOfOne) + ] + + static let boundaryCases = [0.0, .infinity, -.infinity, .nan] + static let typicalValues = [0.0, 1.0, -1.0, 42.0, 1e10, 1e-10] +} + +extension Float16: LinearSpanTestValueProviding { + static let representativeSpanParameters = [ + (lowerBound: Float16(0), length: Float16(1)), + (lowerBound: Float16(-100), length: Float16(200)), + (lowerBound: Float16(0.001), length: Float16(0.1)) // Coarser values + ] + + static let boundaryCases = [Float16(0), .infinity, -.infinity, .nan] + static let typicalValues = [Float16(0), Float16(1), Float16(-1), Float16(42)] +} +``` + +Now our base test class can be even more generic: + +```swift +class LinearSpanTests: XCTestCase + where Representation: BinaryFloatingPoint & LinearSpanTestValueProviding { + + func testPointContainment() { + for (lowerBound, length) in Representation.representativeSpanParameters { + let span = LinearSpan(lowerBound: lowerBound, length: length) + // ... test logic using span + } + } + + func testBoundaryBehavior() { + for value in Representation.boundaryCases { + // Test behavior with boundary values + let span = LinearSpan(lowerBound: value, length: 1) + // ... assertions about boundary behavior + } + } +} +``` + +This approach starts to resemble property-based testing, where we're testing properties that should hold across a range of inputs, but with more control over the specific values used. + +### Validation Helper Functions + +Extracting test logic into validation helpers provides two major benefits: increased semantic clarity in tests and reusability across similar test contexts. + +Consider this validation helper for span ordering: + +```swift +func verify( + span: LinearSpan, + isStrictlyBefore other: LinearSpan, + sourceLocation: StaticString = #filePath, + line: UInt = #line +) { + XCTAssertLessThan( + span.upperBound, other.lowerBound, + "Span \(span) should be strictly before \(other)", + file: sourceLocation, line: line + ) + + // Additional semantic checks + XCTAssertFalse( + span.overlaps(with: other), + "Strictly ordered spans should not overlap", + file: sourceLocation, line: line + ) +} + +func verifyConsistentOrdering( + _ values: [T], + sourceLocation: StaticString = #filePath, + line: UInt = #line +) { + for i in 0.. j { + XCTAssertGreaterThanOrEqual(vi, vj, file: sourceLocation, line: line) + } + + // Verify Comparable laws + if vi < vj { + XCTAssertFalse(vj < vi, "Comparable antisymmetry violated", + file: sourceLocation, line: line) + } + } + } +} +``` + +These helpers can be reused across different test contexts. For example, `verifyConsistentOrdering` is useful for testing any custom `Comparable` conformance, while `verify(span:isStrictlyBefore:)` encapsulates domain-specific invariants about span relationships. + +Here's a more complete example showing validation helpers in action: + +```swift +func validateTranslatedBy( + original: LinearSpan, + offset: R, + sourceLocation: StaticString = #filePath, + line: UInt = #line +) { + let translated = original.translated(by: offset) + + // Length should be preserved + XCTAssertEqual( + translated.length, original.length, + "Translation should preserve span length", + file: sourceLocation, line: line + ) + + // Bounds should be shifted by offset + let expectedLower = original.lowerBound + offset + let expectedUpper = original.upperBound + offset + + // Use appropriate comparison for floating point + if offset.isFinite && original.lowerBound.isFinite { + XCTAssertEqual( + translated.lowerBound, expectedLower, + accuracy: R.ulpOfOne * max(abs(expectedLower), 1), + "Lower bound should be translated by offset", + file: sourceLocation, line: line + ) + } + + // Verify containment relationships are preserved + let testPoint = original.lowerBound + (original.length / 2) + if original.contains(testPoint) && offset.isFinite { + XCTAssertTrue( + translated.contains(testPoint + offset), + "Translated span should contain translated points", + file: sourceLocation, line: line + ) + } +} +``` + +Validation helpers make tests more semantic and help identify exactly what property is being tested. They're particularly valuable when testing numerical code where the same mathematical properties need to be verified across multiple scenarios. + +## Swift Testing Limitations + +Despite Swift Testing being the more modern framework with better Swift integration in many ways, it currently lacks any satisfactory approach for generic testing comparable to what we've achieved with XCTest. + +### No Generic Test-Case Classes + +Swift Testing doesn't have test-case classes at all—tests are just functions, potentially organized within structs annotated with `@Suite`. This fundamental architectural difference means the XCTest inheritance strategy has no direct equivalent. + +### Test Functions Cannot Be Generic + +You might hope to write something like: + +```swift +@Test +func testSpanTranslation() { + let span = LinearSpan(lowerBound: 0, length: 1) + // ... test logic +} +``` + +But this isn't supported—test functions in Swift Testing cannot have generic parameters. The framework needs to know all test functions at compile time with concrete signatures. + +### Failed Approach: Metatypes and Parameter Packs + +One might attempt to use Swift Testing's parameterized test feature with metatypes: + +```swift +@Test( + arguments: [ + Float16.self as any BinaryFloatingPoint.Type, + Float.self as any BinaryFloatingPoint.Type, + Double.self as any BinaryFloatingPoint.Type + ] +) +func testWithMetatype(type: any BinaryFloatingPoint.Type) { + // Attempt to use 'type' to perform generic testing... +} +``` + +Unfortunately, this approach quickly hits fundamental limitations. What you can do with protocol-metatype values is extremely limited—you can't use them to instantiate generic types or call generic functions in any useful way. Parameter packs don't help here either, as they solve a different problem (variadic generic parameters) and still require compile-time resolution. + +### Macro-Based Solutions: A Future Possibility? + +The most promising future direction appears to be custom macros that could generate the necessary boilerplate. Imagine something like: + +```swift +@Suite("LinearSpan") +@GenerateTestSpecializations(types: Float16.self, Float.self, Double.self) +struct LinearSpanTests { + + @GenericTestTemplate("Translation preserves length ({{typename}})") + private func _testTranslation(type: T.Type) { + let span = LinearSpan(lowerBound: 0, length: 1) + let translated = span.translated(by: 10) + #expect(translated.length == span.length) + } +} +``` + +This would expand to create individual test functions for each type. However, this remains speculative—at time of writing, the necessary macro capabilities are either unavailable or still behind feature flags. Additionally, designing such a system well would require careful consideration of parameterized tests, multiple generic parameters, and how to specify type-dependent test metadata. + +### Current Recommendation: Stick with XCTest + +Given these limitations, if you need to write generic tests for generic Swift code at any non-trivial scale, XCTest remains the better choice. While you could write validation helpers and copy-paste concrete test functions in Swift Testing, this approach only works for very small test suites. + +For simple cases with just a few tests and types, the copy-paste approach with validation helpers is acceptable: + +```swift +// Validation helper +func validateStackBehavior(type: T.Type, values: [T]) { + var stack = Stack() + for value in values { + stack.push(value) + } + for value in values.reversed() { + #expect(stack.pop() == value) + } +} + +// Concrete tests (copy-pasted) +@Test func testStack_Int() { + validateStackBehavior(type: Int.self, values: [1, 2, 3]) +} + +@Test func testStack_String() { + validateStackBehavior(type: String.self, values: ["a", "b", "c"]) +} +``` + +But this doesn't scale—once you have dozens of tests across multiple types, the maintenance burden becomes untenable. + +## Conclusion + +Generic testing—writing test suites that are themselves generic—is a powerful technique for validating generic Swift code, particularly when that code's correctness depends on subtle properties of the concrete types being used. This is especially important for numerical and algorithmic code, where behaviors can vary significantly between types like `Float16`, `Float`, and `Double`. + +The XCTest-based approach using generic base classes provides an excellent solution that meets all our requirements: truly generic test logic, minimal invocation overhead, and full integration with standard tooling. Combined with validation helpers and systematic value provisioning through protocols, it creates a robust testing strategy that scales well. + +While it's somewhat ironic that the older XCTest framework handles this advanced use case better than the more modern Swift Testing, the current reality is clear: if you need generic testing capabilities, XCTest is the way to go. Swift Testing may eventually grow to support these use cases—perhaps through macros or other language features—but for now, the situation is what it is. + +The good news is that the XCTest solution works well. It's battle-tested, integrates perfectly with Xcode, and provides all the flexibility needed to thoroughly test generic code. For those working on libraries with complex generic algorithms or numerical code with multiple floating-point types, mastering this pattern is well worth the investment. diff --git a/src/content/briefs/swift-warts/no-decltype-equivalent.md b/src/content/briefs/swift-warts/no-decltype-equivalent.md new file mode 100644 index 0000000..c2c7708 --- /dev/null +++ b/src/content/briefs/swift-warts/no-decltype-equivalent.md @@ -0,0 +1,42 @@ +--- +title: "Swift lacks a `decltype` Equivalent" +cardTitle: "Lack of a `decltype` Equivalent" +description: "In the absence of a `decltype`-like mechanism, some nice-to-have macros wind up unimplementable" +date: "2025-08-15" +--- + +**Update:** the specific issue prompting this brief disappeared between when I drafted it (beta season) and when I finished it (Xcode 26 public release). This doesn't invalidate the broader potential benefit of a `decltype`-like construct, but does significantly lessen its salience—I've shortened the brief appropriately. + +In C++, the `decltype($expression)` construct works like a "magic preprocessor macro" that gets replaced by the compiler-inferred type of `$expression`: + +```c++ +int32_t a = 1; +decltype(a) b = 2; // b is inferred to be `int32_t` +int64_t c = 3; +decltype(a + c) d = 4; // d is inferred to be `int64_t`, due to numeric promotion rules +``` + +In C++ this capability is *necessary* because there are often situations wherein you *must* provide an explicit type declaration, but *writing it* falls somewhere between *difficult* to *impossible*; here's an example taken from [cppreference](https://en.cppreference.com/w/cpp/language/decltype.html), wherein we need to use `decltype` to declare the a-priori *unknowable* return type of a function template: + +```c++ +template +auto add(T t, U u) -> decltype(t + u) { return t + u; } +``` + +Swift (thankfully) avoids the need for such a construct: + +- Swift's type inference largely allows us to omit type annotations for variables and closures (etc.) +- Swift's generic system ensures that even the types of operations are always knowable[^1] + +[^1]: In C++, that `add` function works like a text template: `add(x,y)` behaves as-if you (1) naively replaced it with `x + y` in the source and only then (2) tried to compile it; this is very different from Swift's generic system, which directly compiles generic methods against explicitly-specified protocol APIs. + +As such, (a) we mostly don't need to write annotations but (b) we *can* write them when we need to do so, because we have all the information we need. + +Or so I thought, but there's one exception: sometimes Swift *macros* can generate code that falls into a trap: + +- the generated code is syntactically and semantically correct +- the code is too complex to rely purely on type inference (e.g. complex/nested closures) +- the ordinary fix is to add explicit type annotations +- this is unavailable within a macro because it cannot "see" the type parameters during expansion + +As such, in this one specific case it'd *potentially* be helpful to have a `decltype` equivalent within the language. Given how awkward a fit it is with the rest of the language, however, it'd seem reasonable to limit it to only being valid within macro expansions. diff --git a/src/content/briefs/testing/category.yaml b/src/content/briefs/testing/category.yaml new file mode 100644 index 0000000..aa9f902 --- /dev/null +++ b/src/content/briefs/testing/category.yaml @@ -0,0 +1,4 @@ +displayName: "Testing" +titlePrefix: "Testing" +description: "Briefs about testing strategies, tools, and gotchas." +sortPriority: 21 diff --git a/src/content/briefs/testing/decision-execution-pattern.md b/src/content/briefs/testing/decision-execution-pattern.md new file mode 100644 index 0000000..bdbe6a2 --- /dev/null +++ b/src/content/briefs/testing/decision-execution-pattern.md @@ -0,0 +1,28 @@ +--- +title: "The Decision-Execution Pattern" +cardTitle: "The Decision-Execution Pattern" +description: "Dividing your code into separate \"decision\" and \"execution\" phases clarifies intent and improves testability." +date: "2025-08-31" +draft: true +--- + +One design pattern I find *tremendously* helpful for improving testability is what I call the "decision-execution" pattern; consider this a smaller-scale version of the ["plan-execute" pattern](https://mmapped.blog/posts/29-plan-execute). + +The basic idea is to divide the operation you want to test into two phases: + +1. a "decision" phase that determines what needs to be done, and returns some kind of data item describing what to do +2. an "execution" phase that actually does whatever work was decided-upon in the "decision" phase + +TODO: provide a *motivated*, *concrete* example. + +*Postscript:* another way to interpret this pattern is as an informal, private, delegate-like design pattern: + + + + + + + + + +The code in the "decision" phase should *generally* be structured as a pure function that receives all relevant information via parameters and returns a a function that returns a data item, e.g.: diff --git a/src/content/projects/agentic-navigation-guide/index.md b/src/content/projects/agentic-navigation-guide/index.md index 749692c..7f00498 100644 --- a/src/content/projects/agentic-navigation-guide/index.md +++ b/src/content/projects/agentic-navigation-guide/index.md @@ -44,7 +44,7 @@ The *tool*'s primary job is to check the guide against the state of the file-sys - it parses the content of the `` tag - it checks if each listed path actually exists -- if the guide mentions non-existant files, it reports useful errors +- if the guide mentions non-existent files, it reports useful errors You can use it yourself (e.g. as a pre-commit hook, etc.), if you like. You can also set it up as a "hook" for Claude Code, in which case: diff --git a/src/content/projects/hdxl-xctest-retrofit/index.md b/src/content/projects/hdxl-xctest-retrofit/index.md index c0c64bd..f91c648 100644 --- a/src/content/projects/hdxl-xctest-retrofit/index.md +++ b/src/content/projects/hdxl-xctest-retrofit/index.md @@ -10,7 +10,7 @@ repoURL: "https://github.com/plx/hdxl-xctest-retrofit/" [`HDXLXCTestRetrofit`](https://github.com/plx/hdxl-xctest-retrofit/) is a small library of *macros* that you can use to adapt *most*[^1] existing [`XCTest`](https://developer.apple.com/documentation/xctest) unit tests to [Swift Testing](https://developer.apple.com/documentation/testing/) without having to substantively rewrite them: 1. migrate from `XCTestCase` subclasses to `@Suite` structs -2. apply `@Test` annotation to test functions[^1] +2. apply `@Test` annotation to test functions[^2] 2. prepend `#` to `XCTAssert*` calls [^1]: The primary gaps are around expectations, expected failures, and attachments—IMHO those don't map cleanly to Swift Testing's APIs, so they're currently unsupported. diff --git a/src/content/projects/trop/index.md b/src/content/projects/trop/index.md new file mode 100644 index 0000000..7aeef9b --- /dev/null +++ b/src/content/projects/trop/index.md @@ -0,0 +1,110 @@ +--- +title: "trop" +description: "Idempotent local port-reservation tool." +date: 2025-10-18 +demoURL: "https://plx.github.io" +repoURL: "https://github.com/plx/trop/" +draft: false +--- + +`trop` is a CLI tool that acts as a "reservation manager" for port numbers. +Reservations are tied to file-system paths, which has several benefits: + +- `trop` is idempotent, and will return sticky/stable ports when invoked with the same path +- `trop` can automatically cleanup reservations tied to no-longer-existent paths + +The intended use case was as a drop-in replacement for hard-coded port numbers in project automation scripts, e.g.: + +```bash +# before +my-server --port 8080 + +# after +my-server --port $(trop reserve) +``` + +## Origin Story + +The *motivation* for this tool was to streamline the "simultaneous agents in multiple worktrees"-style workflows, e.g. wherein: + +- you have multiple claude code instances operating concurrently +- each instance is working on a distinct task +- each instance is working within a distinct worktree + +`trop` exists because using that workflow with *small-and-simple* projects can easily lead to port collisions. +For example, envision this scenario: + +- you're working on a static website +- the project has a "preview" command that launches a local server +- for convenience, the "preview" command hardcodes a specific port number (e.g. `4040`) +- you start using "simultaneous agents in multiple worktrees": + - agent 1 is working on adding tagging support + - agent 2 is fixing a layout bug + - agent 3 is adding no-follow links + - agent 4 is improving the CSS adaptivity +- each agent is trying to assess its own work by: + - launching the preview server + - using the playwright-mcp to QA its changes + +Easy to see how things could go sideways here: in the best outcome, agent burns tokens figuring out there's a collision then successfully working around it; in the worst outcome, the agents *don't* notice the issue, and proceed to get each other very, very confused. + +`trop` exists to address this specific problem, and its design choices reflect that: + +- the path-based system plays nicely with worktrees and multi-agent workflows +- it has a concept of "reservation groups" for recurring reservation-patterns, suitable for use in worktree-setup scripts +- its built-in defaults for the optional `project` and `task` metadata map to the "repo" and "worktree", respectively +- the path-based cleanup makes it easy to (eventually) prune reservations for finished worktrees—no need for custom scripts or hooks + +Note that `trop` is purely a local system, and is very much *not* meant to handle large-and-complex scenarios—you won't need `trop` if you're already using kubernetes. +It's also not meant to handle the even-more-advanced practice of having multiple concurrent agents in the same worktree—`trop`'s design is only intended for the "one agent per worktree" strategy. + +## Implementation Remarks + +The CLI is implemented in Rust, and uses SQLite for two distinct purposes: + +- persistence: a sqlite db contains the central "reservation store" +- synchronization: `trop` uses SQLite's built-in support for cross-process locking to synchronize between concurrent invocations + +Internally the project is structured as a library with a (thin) CLI wrapper. It also extensively uses the plan-execute design pattern internally, wherein most operations are structured as a two-step process: + +- a "plan" phase, wherein it prepares a "plan" *describing* the actions to be taken +- an "execute" phase, wherein it executes the "plan" prepared during the previous phase + +This pattern *greatly* increases testability, and has additional side benefits (e.g. easy, robust, and consistent support for "dry-run" mode). On that note, the test suite is *quite* extensive, and includes a mix of unit tests, integration tests, end-to-end tests, and property-based tests. + +## Implementation Strategy + +I treated this project as another "vibe-coding experiment", with a couple meta-goals; my desired workflow was: + +- spend a lot of time on the specification +- have Claude decompose it into a phased implementation plan +- have Claude Code write *all* the code +- operate Claude Code in a "high-autonomy, hands-off" mode + +In other words, I would *ideally* have been able to tell Claude "go implement phase 07" and then let Claude run attended until it opened a PR for "phase 07"; for this approach to work, the code Claude wrote would obviously also need to adhere to the spec and be fit-for-purpose. + +I'll save a detailed summary of my strategies for a separate article, but I'll give you some early takeaways now: + +1. The workflow I came up with worked surprisingly well, but "go do phase 07" was too large for a single session. As such, I had to manually "puppet" Claude through the major workflow steps, although that was thankfully low-effort for me since each step was encapsulated into a custom slash command. To get that truly hands-off experience I would need to run Claude through an orchestration wrapper—will explore that another time. +2. Relying on Claude to review Claude's work worked reasonably well, but with one troubling pattern: in some cases where the task seemed *too complex* for Claude to understand while implementing, it'd still pass "review", too. My *suspicion* here is that "if it's too complex for Claude to code, it's also too complex for the *reviewer* to understand"; this deserves a dedicated article. +3. Towards the end I had to bring in ChatGPT's Codex, which proved *invaluable*. Another "needs an article" topic, but the highlights are: + - Claude is like an artificial, highly-steerable "team member" + - Codex is more like an "oracle": consistently provides correct answers to challenging tasks, but isn't as (obviously) steerable (TBD) + +Despite having to undertake a few significant interventions, my overall experience is very positive! + +Overall, I'd say that we're closer than I thought to being able to write a detailed spec and have coding agents diligently implement it (and implement it *correctly*, at that). +We may even be at that point, in fact, if your skill level is high enough and your strategy is sufficiently sophisticated. + +As a final remark—just to put some concrete measurements on the table—I'd ballpark this project as just about 1 week of end-to-end, full-time work. +It's hard to be too precise because I was doing this in-between other things, but to the best of my recollection: + +- 2 full days spent writing-and-revising the specification +- 1 full day spent on getting the "Claude infrastructure in place" +- 2 full days of actual Claude Code "coding time" (if I just let it run in yolo mode) + +Worst-case, add another day (total) to account for the various times I had to intervene and correct Claude's direction. + +## Future Directions + +Currently, none other than bug fixes: the project is "done" in the sense that it solves the problem I set out to solve, and feels feature-complete vis-a-vis my actual-and-foreseeable needs. diff --git a/src/lib/opengraph.ts b/src/lib/opengraph.ts index 4bf5987..2364438 100644 --- a/src/lib/opengraph.ts +++ b/src/lib/opengraph.ts @@ -83,7 +83,7 @@ export function getPostOGData( author: "plx", theme: "dark", backgroundImage: "gradient", - logo: `${siteUrl}/favicon-light.svg` + logo: `${siteUrl}/default-og-image.jpg` }); } @@ -127,7 +127,7 @@ export function getBriefOGData( author: "plx", theme: "dark", backgroundImage: "gradient", - logo: `${siteUrl}/favicon-light.svg` + logo: `${siteUrl}/default-og-image.jpg` }); } @@ -170,7 +170,7 @@ export function getProjectOGData( author: "plx", theme: "dark", backgroundImage: "gradient", - logo: `${siteUrl}/favicon-light.svg` + logo: `${siteUrl}/default-og-image.jpg` }); } @@ -206,7 +206,7 @@ export function getListOGData( subtitle, theme: "dark", backgroundImage: "gradient", - logo: `${siteUrl}/favicon-light.svg` + logo: `${siteUrl}/default-og-image.jpg` }); return { @@ -235,7 +235,7 @@ export function getHomeOGData( subtitle: "Technical writing and projects", theme: "dark", backgroundImage: "gradient", - logo: `${siteUrl}/favicon-light.svg` + logo: `${siteUrl}/default-og-image.jpg` }); return { diff --git a/src/pages/index.astro b/src/pages/index.astro index d2b9f6d..e4b3e5d 100644 --- a/src/pages/index.astro +++ b/src/pages/index.astro @@ -20,6 +20,7 @@ const projects = (await getCollection("projects")) .slice(0,SITE.NUM_PROJECTS_ON_HOMEPAGE); const allwork = (await getCollection("briefs")) + .filter(brief => !brief.data.draft) .sort((a, b) => b.data.date.valueOf() - a.data.date.valueOf()) .slice(0,SITE.NUM_BRIEFS_ON_HOMEPAGE);