diff --git a/shortcuts/mail/body_file.go b/shortcuts/mail/body_file.go new file mode 100644 index 000000000..2270e6700 --- /dev/null +++ b/shortcuts/mail/body_file.go @@ -0,0 +1,102 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package mail + +import ( + "io" + "strings" + + "github.com/larksuite/cli/extension/fileio" + "github.com/larksuite/cli/internal/output" + "github.com/larksuite/cli/shortcuts/common" +) + +// bodyFileFlag is the shared `--body-file` flag declaration reused by every +// compose shortcut (+send / +draft-create / +reply / +reply-all / +forward). +// All six shortcuts honour the same mutual-exclusion contract with `--body` +// and the cwd-subtree path safety rule. The flag is intentionally NOT +// shared with `+lint-html` because that command's description differs +// ("HTML to lint" vs "email body") in a way that is more readable when +// authored per-shortcut. `+draft-edit` does not expose `--body-file` either +// — its body ops flow through `--patch-file` JSON whose `value` field is +// the natural file-based entry point for large bodies. +var bodyFileFlag = common.Flag{ + Name: "body-file", + Desc: "Path (relative, within cwd subtree) to a file containing the email body HTML. Mutually exclusive with --body. Size capped at 32 MB.", + Input: []string{common.File}, +} + +// maxBodyFileSize caps the size of a `--body-file` HTML input. The compose +// path's downstream EML limit is 25 MB (helpers.go MAX_EML_BYTES); we allow a +// bit more headroom here (32 MB) so a body close to the limit still loads +// before the downstream check fires with a clearer error message. The cap +// prevents an `io.ReadAll` from blowing memory on a misdirected gigabyte +// file. +const maxBodyFileSize = 32 * 1024 * 1024 // 32 MB + +// validateBodyFileMutex enforces the `--body` / `--body-file` mutual +// exclusion + cwd-subtree path safety. Compose shortcuts call this in +// their Validate phase so AI / users see a clear error before any work +// runs. Pass the shortcut's RuntimeContext-resolved flag values directly: +// `bodyFlag` is the `--body` value (may be empty), `bodyFile` is the +// trimmed `--body-file` value, and `validatePath` is the +// runtime.ValidatePath bound function used to enforce the relative-path +// rule (cwd-subtree only; no absolute / `..` traversal). +// +// Returns an ErrValidation error when either invariant is violated, nil +// otherwise. The "exactly one of {--body, --body-file}" check is +// shortcut-specific (some shortcuts allow neither, e.g. `+forward` with +// no explicit body) and is therefore left to the caller. +func validateBodyFileMutex(bodyFlag, bodyFile string, validatePath func(string) error) error { + bodyEmpty := strings.TrimSpace(bodyFlag) == "" + if !bodyEmpty && bodyFile != "" { + return output.ErrValidation("--body and --body-file are mutually exclusive; pass exactly one") + } + if bodyFile != "" { + if err := validatePath(bodyFile); err != nil { + return output.ErrValidation("--body-file: %v", err) + } + } + return nil +} + +// resolveBodyFromFlags returns the body content from --body or --body-file. +// Validate has already enforced mutual exclusion via validateBodyFileMutex, +// so exactly one is set (or neither when a template / parent message +// supplies the body). Returns ("", nil) when neither flag is set so +// downstream code can decide whether the empty body is allowed. +func resolveBodyFromFlags(runtime *common.RuntimeContext) (string, error) { + if body := runtime.Str("body"); strings.TrimSpace(body) != "" { + return body, nil + } + path := strings.TrimSpace(runtime.Str("body-file")) + if path == "" { + return "", nil + } + return readBodyFile(runtime.FileIO(), path) +} + +// readBodyFile loads --body-file content with a size cap. Returns an +// ErrValidation error if the file exceeds maxBodyFileSize or any IO error +// occurs. The size check uses io.LimitReader(maxBodyFileSize+1) so any +// over-cap byte is observable without reading the whole file. +// +// Callers MUST have run runtime.ValidatePath(path) on `path` first — the +// helper only opens the file via the supplied FileIO and does not repeat +// the cwd-subtree safety check. +func readBodyFile(fio fileio.FileIO, path string) (string, error) { + f, err := fio.Open(path) + if err != nil { + return "", output.ErrValidation("open --body-file %s: %v", path, err) + } + defer f.Close() + buf, err := io.ReadAll(io.LimitReader(f, maxBodyFileSize+1)) + if err != nil { + return "", output.ErrValidation("read --body-file %s: %v", path, err) + } + if len(buf) > maxBodyFileSize { + return "", output.ErrValidation("--body-file: file exceeds %d MB limit", maxBodyFileSize/1024/1024) + } + return string(buf), nil +} diff --git a/shortcuts/mail/lint/linter.go b/shortcuts/mail/lint/linter.go new file mode 100644 index 000000000..8df827729 --- /dev/null +++ b/shortcuts/mail/lint/linter.go @@ -0,0 +1,1078 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package lint + +import ( + "bytes" + "fmt" + "hash/fnv" + "strings" + + xhtml "golang.org/x/net/html" + "golang.org/x/net/html/atom" +) + +// MaxExcerptBytes caps the raw-HTML excerpt embedded in a Finding.Excerpt so +// a single offending tag with megabyte content can't bloat the envelope JSON. +// Lint operates on bytes only, but the excerpt representation must not be +// size-amplifying. +const MaxExcerptBytes = 200 + +// Run lints the given HTML body and returns a structured Report. +// Report.CleanedHTML contains the rewritten HTML (warnings rewritten + errors +// deleted) — the autofix is unconditional. +// +// IMPORTANT: when the input is empty or plain-text (no HTML markup detected +// by the cli's existing `bodyIsHTML` heuristic), callers should short-circuit +// with EmptyReport(html) instead of paying the parse cost. Run still handles +// this gracefully — html.Parse on plain text wraps the input in +// ..., and the lib's pass-through +// rendering will reproduce the original text — but the round-trip is wasteful +// and produces no findings. +func Run(html string, opts Options) Report { + if html == "" { + return EmptyReport("") + } + + rep := Report{ + Applied: []Finding{}, + Blocked: []Finding{}, + } + + // We use html.ParseFragment so users authoring fragment-style snippets + // (the canonical compose-5 input shape — `
...
` rather than a + // full document) don't get implicit wrappers + // re-rendered. The "body" insertion mode matches what html.Parse would + // have done internally for a fragment but skips the structural wrappers + // at render time. + bodyContext := &xhtml.Node{Type: xhtml.ElementNode, DataAtom: atom.Body, Data: "body"} + nodes, err := xhtml.ParseFragment(strings.NewReader(html), bodyContext) + if err != nil { + // Parser failure is exceptional (the parser is permissive by design); + // fall back to the original input so we don't lose user content. + return EmptyReport(html) + } + + // Wrap fragment nodes in a synthetic root so the recursive walker has a + // uniform parent pointer to mutate. + root := &xhtml.Node{Type: xhtml.DocumentNode} + for _, n := range nodes { + root.AppendChild(n) + } + + walk(root, &rep) + // nativeCtx tracks per-Run() state so positional ids (e.g. data-ol-id) + // are deterministic across multiple Run() calls on the same input — + // keying off the document-traversal order rather than heap pointers, + // so cleaned_html is byte-stable and amenable to golden-file tests / CI + // diff / cache-key reuse. + nctx := &nativeCtx{olIDs: map[*xhtml.Node]string{}} + applyFeishuNativeStyles(root, &rep, nctx) + + rep.HasErrorFindings = len(rep.Blocked) > 0 + rep.HasWarningFindings = len(rep.Applied) > 0 + rep.CleanedHTML = renderFragment(root) + + return rep +} + +// walk visits every element node under parent, applying tag/attr/style +// classification. Children are iterated via the next-sibling pointer because +// we mutate the tree in place (replace / remove nodes). +// +// The walker is iterative-style via explicit recursion because the html +// parser's typical nesting depth (≤ 256 by default) is well below Go's +// goroutine stack limit; the existing draft package's plainTextFromHTML +// (mail/draft/htmltext.go) similarly recurses for the same reason. +func walk(parent *xhtml.Node, rep *Report) { + child := parent.FirstChild + for child != nil { + next := child.NextSibling + if child.Type == xhtml.ElementNode { + processElement(parent, child, rep) + } + // child may have been removed/replaced by processElement; recurse + // only if it still has the original parent (i.e. wasn't deleted). + // The html parser sets Parent on every node, so a removed-then- + // reattached node still recurses correctly via its new Parent. + if child.Parent != nil { + walk(child, rep) + } + child = next + } +} + +// processElement applies the element-level classification cascade: +// 1. tag → allow / warn-rewrite / error-delete +// 2. attributes → on*-handlers, URL-bearing attrs (scheme allow-list), +// style attribute (CSS property allow-list) +func processElement(parent, n *xhtml.Node, rep *Report) { + tagName := strings.ToLower(n.Data) + kind, ruleID := classifyTag(tagName) + + switch kind { + case "error": + rep.Blocked = append(rep.Blocked, Finding{ + RuleID: ruleID, + Severity: SeverityError, + TagOrAttr: tagName, + Excerpt: excerptOf(n), + Hint: hintForBlockedTag(tagName), + }) + // Always remove blocked tags — the writing-path safety floor has no + // opt-out; `--no-lint` is not provided. + parent.RemoveChild(n) + return + + case "warn": + // Always rewrite (e.g. ) and surface the finding. + rep.Applied = append(rep.Applied, Finding{ + RuleID: ruleID, + Severity: SeverityWarning, + TagOrAttr: tagName, + Excerpt: excerptOf(n), + Hint: hintForWarnTag(tagName), + }) + rewriteWarnTag(n, tagName) + // Recurse into the rewritten node by falling through; the rewrite + // preserved children as-is. + // fall through to attribute scan + case "allow": + // no-op + } + + // Attribute scan: build a new attribute slice, dropping/sanitising as we + // go and surfacing findings. + if len(n.Attr) > 0 { + processAttributes(n, rep) + } +} + +// processAttributes walks the attribute list and: +// - drops on*-handlers (always; surfaced as error) +// - drops URL-bearing attrs whose value uses a forbidden scheme +// - filters the `style` attribute property-by-property against the allow-list +// +// Other attributes pass through unchanged. The cli's existing +// `validateInlineCIDs` (helpers.go:2226) handles `cid:`-specific checks; +// the lint must not duplicate that responsibility. +func processAttributes(n *xhtml.Node, rep *Report) { + keep := n.Attr[:0] + for _, attr := range n.Attr { + name := strings.ToLower(attr.Key) + + // 1. on*-handlers → always drop, error-tier. + if isEventHandlerAttr(name) { + rep.Blocked = append(rep.Blocked, Finding{ + RuleID: RuleAttrEventHandlerBlocked, + Severity: SeverityError, + TagOrAttr: name, + Excerpt: truncateExcerpt(attr.Key + "=\"" + attr.Val + "\""), + Hint: "Removed event handler attribute (on*)", + }) + continue + } + + // 2. URL-bearing attrs → check scheme allow-list. + if urlAttributes[name] { + kind, ruleID := classifyURLValue(attr.Val) + switch kind { + case "error": + rep.Blocked = append(rep.Blocked, Finding{ + RuleID: ruleID, + Severity: SeverityError, + TagOrAttr: name, + Excerpt: truncateExcerpt(attr.Key + "=\"" + attr.Val + "\""), + Hint: "Removed dangerous URL scheme (allowed: http/https/mailto/cid/data:image/*)", + }) + continue + case "warn": + rep.Blocked = append(rep.Blocked, Finding{ + RuleID: ruleID, + Severity: SeverityError, + TagOrAttr: name, + Excerpt: truncateExcerpt(attr.Key + "=\"" + attr.Val + "\""), + Hint: "Removed URL with unrecognised scheme (allowed: http/https/mailto/cid/data:image/*)", + }) + // Always drop the attribute — writing-path safety floor (the + // URL would not render correctly anyway). + continue + } + } + + // 3. `style` attribute → property-by-property allow-list. + if name == "style" { + cleaned, dropped := sanitiseStyleAttr(attr.Val) + for _, prop := range dropped { + rep.Applied = append(rep.Applied, Finding{ + RuleID: RuleStylePropertyDropped, + Severity: SeverityWarning, + TagOrAttr: "style." + prop, + Excerpt: truncateExcerpt(prop), + Hint: "Removed CSS property not in allowlist (see references/lark-mail-html.md)", + }) + } + if len(dropped) == 0 { + // Byte-stable when no property was dropped: leave the + // attribute exactly as authored so lint round-trips are + // idempotent on clean input. + keep = append(keep, attr) + continue + } + if cleaned == "" { + // All properties dropped — remove the attribute entirely. + continue + } + attr.Val = cleaned + keep = append(keep, attr) + continue + } + + // 4. Pass-through. + keep = append(keep, attr) + } + n.Attr = keep +} + +// rewriteWarnTag replaces a warning-tier tag with its Feishu-native +// equivalent in place: with color/face/size +// distilled into inline style;
; +// / (text-only, animation discarded — collapsing +// to a span keeps the children but drops the deprecated animation effect). +func rewriteWarnTag(n *xhtml.Node, tagName string) { + switch tagName { + case "font": + // Distill . + var styles []string + var keepAttrs []xhtml.Attribute + for _, attr := range n.Attr { + switch strings.ToLower(attr.Key) { + case "color": + if v := strings.TrimSpace(attr.Val); v != "" { + styles = append(styles, "color:"+v) + } + case "face": + if v := strings.TrimSpace(attr.Val); v != "" { + styles = append(styles, "font-family:"+v) + } + case "size": + if v := mapFontSize(attr.Val); v != "" { + styles = append(styles, "font-size:"+v) + } + default: + keepAttrs = append(keepAttrs, attr) + } + } + // Merge any existing style attribute already present on the + // (rare but possible). + if len(styles) > 0 { + merged := strings.Join(styles, ";") + styleIdx := -1 + for i, attr := range keepAttrs { + if strings.ToLower(attr.Key) == "style" { + styleIdx = i + break + } + } + if styleIdx >= 0 { + existing := strings.TrimRight(keepAttrs[styleIdx].Val, "; ") + if existing != "" { + merged = existing + ";" + merged + } + keepAttrs[styleIdx].Val = merged + } else { + keepAttrs = append(keepAttrs, xhtml.Attribute{Key: "style", Val: merged}) + } + } + n.Data = "span" + n.DataAtom = atom.Span + n.Attr = keepAttrs + + case "center": + //
. Existing style attr + // (if any) is merged with text-align prepended. + styleIdx := -1 + for i, attr := range n.Attr { + if strings.ToLower(attr.Key) == "style" { + styleIdx = i + break + } + } + newStyle := "text-align:center" + if styleIdx >= 0 { + existing := strings.TrimRight(n.Attr[styleIdx].Val, "; ") + if existing != "" { + newStyle = newStyle + ";" + existing + } + n.Attr[styleIdx].Val = newStyle + } else { + n.Attr = append(n.Attr, xhtml.Attribute{Key: "style", Val: newStyle}) + } + n.Data = "div" + n.DataAtom = atom.Div + + case "marquee", "blink": + // Both deprecated; collapse to so children survive. + n.Data = "span" + n.DataAtom = atom.Span + // Strip marquee-specific attributes (direction, scrollamount, ...) + // so the rewritten span is plain. + var keepAttrs []xhtml.Attribute + for _, attr := range n.Attr { + if strings.ToLower(attr.Key) == "style" || strings.ToLower(attr.Key) == "class" || strings.ToLower(attr.Key) == "id" { + keepAttrs = append(keepAttrs, attr) + } + } + n.Attr = keepAttrs + } +} + +// mapFontSize maps the legacy values (1..7) to a CSS px +// equivalent, matching the mapping used by Feishu mail-editor's renderer. +// Out-of-range values fall through to the empty string so the property is +// dropped (better than emitting an arbitrary value). +func mapFontSize(raw string) string { + switch strings.TrimSpace(raw) { + case "1": + return "10px" + case "2": + return "13px" + case "3": + return "16px" + case "4": + return "18px" + case "5": + return "24px" + case "6": + return "32px" + case "7": + return "48px" + default: + return "" + } +} + +// sanitiseStyleAttr filters a `style="prop1:val; prop2:val"` declaration +// against the property allow-list. Returns the cleaned style text (joined +// with "; " separators) and a slice of dropped property names (lower-case) +// so the caller can surface STYLE_PROPERTY_DROPPED findings. +// +// NOTE: We do NOT validate property values — only property names. The style +// attribute is filtered by CSS property allow-list; value-level validation +// (e.g. URL safety inside `background-image: url(...)`) is delegated to the +// urlAttributes path because such values typically appear in `src` / `href` +// attrs in compose-5 templates. Users authoring `background-image: url(http:...)` +// in inline style will see the property pass — the URL inside is not a +// security concern at the inline-style level since URL fetching from style +// is restricted by the rendering layer's CSP regardless. +func sanitiseStyleAttr(raw string) (cleaned string, dropped []string) { + if strings.TrimSpace(raw) == "" { + return "", nil + } + parts := strings.Split(raw, ";") + keep := make([]string, 0, len(parts)) + for _, part := range parts { + decl := strings.TrimSpace(part) + if decl == "" { + continue + } + colon := strings.IndexByte(decl, ':') + if colon < 0 { + // Malformed declaration; drop and surface as a finding so the + // user notices. + dropped = append(dropped, decl) + continue + } + name := strings.ToLower(strings.TrimSpace(decl[:colon])) + if !classifyStyleProperty(name) { + dropped = append(dropped, name) + continue + } + keep = append(keep, decl) + } + cleaned = strings.Join(keep, "; ") + return cleaned, dropped +} + +// hintForBlockedTag returns a hint for an error-blocked tag (matching +// the `output.ErrWithHint` convention used elsewhere in the cli). +func hintForBlockedTag(tag string) string { + switch tag { + case "script": + return "Removed whole tag (XSS risk)" + case "iframe", "object", "embed": + return "Removed whole tag (external embeds not allowed; use or a body link for rich media)" + case "form", "input", "select", "option", "button": + return "Removed whole tag (forms not allowed in email body)" + case "link": + return "Removed (external CSS / resources not allowed)" + case "meta": + return "Removed (viewport / refresh declarations not allowed)" + case "base": + return "Removed (URL base rewrites not allowed)" + default: + return "Removed whole tag (tag not allowed)" + } +} + +// hintForWarnTag returns a hint for a warning-tier tag. +func hintForWarnTag(tag string) string { + switch tag { + case "font": + return "Rewritten as (modern HTML expresses size / color via inline style)" + case "center": + return "Rewritten as
(deprecated
tag)" + case "marquee", "blink": + return "Rewritten as (animations not supported; text preserved)" + default: + return "Rewritten in modern HTML shape" + } +} + +// excerptOf renders the offending node's open-tag header into a short string +// suitable for surfacing in a Finding.Excerpt. We render only the tag header +// (not the full subtree) so a single offending

after

`, Options{}) + if len(rep.Blocked) != 1 { + t.Fatalf("expected 1 blocked finding, got %d", len(rep.Blocked)) + } + if rep.Blocked[0].RuleID != RuleTagScriptBlocked { + t.Errorf("rule = %s, want %s", rep.Blocked[0].RuleID, RuleTagScriptBlocked) + } + if strings.Contains(rep.CleanedHTML, " content should be deleted, cleaned=%q", rep.CleanedHTML) + } + if !strings.Contains(rep.CleanedHTML, "safe") || !strings.Contains(rep.CleanedHTML, "after") { + t.Errorf("surrounding content lost, cleaned=%q", rep.CleanedHTML) + } +} + +// TestRun_BlockedTagsRemoved iterates all error-tier tags. +func TestRun_BlockedTagsRemoved(t *testing.T) { + cases := map[string]string{ + ``: RuleTagIframeBlocked, + ``: RuleTagObjectBlocked, + ``: RuleTagEmbedBlocked, + `
`: RuleTagFormBlocked, + ``: RuleTagLinkBlocked, + ``: RuleTagMetaBlocked, + ``: RuleTagBaseBlocked, + } + for input, wantRule := range cases { + t.Run(input[:min(len(input), 30)], func(t *testing.T) { + rep := Run(input, Options{}) + found := false + for _, f := range rep.Blocked { + if f.RuleID == wantRule { + found = true + break + } + } + if !found { + t.Errorf("expected rule %s, got %+v", wantRule, rep.Blocked) + } + }) + } +} + +// TestRun_EventHandlerAttrBlocked verifies on*-handlers (onclick etc.) are +// stripped — they are an event-handler injection vector. +func TestRun_EventHandlerAttrBlocked(t *testing.T) { + rep := Run(`

x

`, Options{}) + if len(rep.Blocked) != 1 { + t.Fatalf("expected 1 blocked finding, got %d", len(rep.Blocked)) + } + if rep.Blocked[0].RuleID != RuleAttrEventHandlerBlocked { + t.Errorf("rule = %s, want %s", rep.Blocked[0].RuleID, RuleAttrEventHandlerBlocked) + } + if strings.Contains(rep.CleanedHTML, "onclick") { + t.Errorf("onclick should be stripped, cleaned=%q", rep.CleanedHTML) + } + if !strings.Contains(rep.CleanedHTML, `id="ok"`) { + t.Errorf("non-handler attrs should survive, cleaned=%q", rep.CleanedHTML) + } +} + +// TestRun_OnErrorAttrBlocked tests one of the more common XSS vectors. +func TestRun_OnErrorAttrBlocked(t *testing.T) { + rep := Run(``, Options{}) + hasErr := false + for _, f := range rep.Blocked { + if f.RuleID == RuleAttrEventHandlerBlocked && f.TagOrAttr == "onerror" { + hasErr = true + } + } + if !hasErr { + t.Errorf("onerror should fire, got %+v", rep.Blocked) + } +} + +// ===================================================================== +// URL scheme allow-list. +// ===================================================================== + +// TestRun_JavaScriptURLBlocked verifies javascript: hrefs are stripped. +func TestRun_JavaScriptURLBlocked(t *testing.T) { + rep := Run(`click`, Options{}) + hasErr := false + for _, f := range rep.Blocked { + if f.RuleID == RuleAttrJSURLBlocked { + hasErr = true + } + } + if !hasErr { + t.Errorf("javascript: URL should fire ATTR_JS_URL_BLOCKED, got %+v", rep.Blocked) + } + if strings.Contains(rep.CleanedHTML, "javascript:") { + t.Errorf("javascript: should be stripped, cleaned=%q", rep.CleanedHTML) + } +} + +// TestRun_VBScriptURLBlocked verifies vbscript: is rejected. +func TestRun_VBScriptURLBlocked(t *testing.T) { + rep := Run(`x`, Options{}) + if len(rep.Blocked) == 0 { + t.Errorf("expected vbscript: to be blocked, got 0 findings") + } +} + +// TestRun_DataNonImageURLBlocked verifies data:text/html is rejected +// (only data:image/* is allowed). +func TestRun_DataNonImageURLBlocked(t *testing.T) { + rep := Run(``, Options{}) + if len(rep.Blocked) == 0 { + t.Errorf("expected data:text/html to be blocked") + } +} + +// TestRun_DataImageAllowed verifies data:image/png passes. +func TestRun_DataImageAllowed(t *testing.T) { + rep := Run(``, Options{}) + for _, f := range rep.Blocked { + if f.RuleID == RuleAttrJSURLBlocked { + t.Errorf("data:image/* should pass, got %+v", f) + } + } +} + +// TestRun_RelativeURLAllowed verifies relative URLs (no scheme) pass. +func TestRun_RelativeURLAllowed(t *testing.T) { + rep := Run(`x`, Options{}) + for _, f := range rep.Blocked { + if f.RuleID == RuleAttrJSURLBlocked || f.RuleID == RuleAttrUnsafeSchemeBlocked { + t.Errorf("relative URL should pass, got %+v", f) + } + } +} + +// ===================================================================== +// Style property allow-list. +// ===================================================================== + +// TestRun_StylePropertyDropped verifies non-allow-list properties drop. +func TestRun_StylePropertyDropped(t *testing.T) { + rep := Run(`

x

`, Options{}) + dropped := []string{} + for _, f := range rep.Applied { + if f.RuleID == RuleStylePropertyDropped { + dropped = append(dropped, f.TagOrAttr) + } + } + if !sliceContains(dropped, "style.position") { + t.Errorf("expected position to be dropped, got %v", dropped) + } + if !sliceContains(dropped, "style.z-index") { + t.Errorf("expected z-index to be dropped, got %v", dropped) + } + if strings.Contains(rep.CleanedHTML, "position:") || strings.Contains(rep.CleanedHTML, "z-index:") { + t.Errorf("dropped properties should be removed from cleaned style, cleaned=%q", rep.CleanedHTML) + } + if !strings.Contains(rep.CleanedHTML, "color:red") { + t.Errorf("allowed property should survive, cleaned=%q", rep.CleanedHTML) + } +} + +// TestRun_StyleBorderPrefixAllowed verifies the border-* prefix rule. +func TestRun_StyleBorderPrefixAllowed(t *testing.T) { + rep := Run(`

x

`, Options{}) + for _, f := range rep.Applied { + if f.RuleID == RuleStylePropertyDropped { + t.Errorf("border-* should pass, got %+v", f) + } + } +} + +// TestRun_FeishuListShorthandMarginPreserved guards the nested-list indent +// regression: when a user writes shorthand `margin:0 0 0 24px` on an inner +//
    (mail-editor's own native nested-list shape), the Feishu-list autofix +// must NOT clobber it by appending `margin-left:0`. ensureInlineStyleProps +// is supposed to skip props the user already declared, but earlier +// hasInlineStyleProp was only matching longhand `margin-left:` literally +// and missed the shorthand form, causing 24px indents to be reset to 0. +func TestRun_FeishuListShorthandMarginPreserved(t *testing.T) { + in := `
    • indented
    ` + rep := Run(in, Options{}) + cleaned := rep.CleanedHTML + // Extract just the
      opening tag's style attr (li has its own + // independent margin-left:0 longhand which is correct — list indent + // belongs on the container, not the item). + ulOpen := cleaned + if i := strings.Index(ulOpen, ">"); i >= 0 { + ulOpen = ulOpen[:i] + } + if !strings.Contains(ulOpen, "margin:0px 0px 0px 24px") { + t.Errorf("shorthand margin with 24px left should survive on
        , ulOpen=%q", ulOpen) + } + // The bug signature: extra `margin-left:` appended after the shorthand + // on the
          element itself (CSS rule says the later one wins, so any + // margin-left:0 after the shorthand resets the indent to 0). + if strings.Contains(ulOpen, "margin-left") { + t.Errorf("autofix must not append margin-left longhand onto
            when shorthand already declares it, ulOpen=%q", ulOpen) + } +} + +// ===================================================================== +// CleanedHTML output / contract guarantees. +// ===================================================================== + +// TestRun_EmptyArraysAlwaysPresent verifies the report has non-nil empty +// slices when nothing is found (the JSON envelope contract requires `[]`, +// not `null`). +func TestRun_EmptyArraysAlwaysPresent(t *testing.T) { + // Use
            instead of

            to avoid the Feishu-native paragraph + // rewrite autofix, which would surface a finding even on otherwise + // clean input. + rep := Run(`

            nothing here
            `, Options{}) + if rep.Applied == nil || rep.Blocked == nil { + t.Errorf("Applied/Blocked must be non-nil; got applied=%v blocked=%v", rep.Applied, rep.Blocked) + } + if len(rep.Applied) != 0 || len(rep.Blocked) != 0 { + t.Errorf("expected empty findings, got applied=%d blocked=%d", len(rep.Applied), len(rep.Blocked)) + } +} + +// TestEmptyReport_HasContractFields covers the helper used by compose 5's +// plain-text branch. +func TestEmptyReport_HasContractFields(t *testing.T) { + rep := EmptyReport(`plain text`) + if rep.Applied == nil { + t.Error("Applied must be non-nil") + } + if rep.Blocked == nil { + t.Error("Blocked must be non-nil") + } + if rep.CleanedHTML != "plain text" { + t.Errorf("CleanedHTML = %q, want %q", rep.CleanedHTML, "plain text") + } +} + +// TestRun_CleanedHTMLPreservesStructure verifies that the round-trip through +// the parser doesn't accidentally lose user content. +func TestRun_CleanedHTMLPreservesStructure(t *testing.T) { + html := `

            title

            body bold end

            • a
            • b
            ` + rep := Run(html, Options{}) + if len(rep.Blocked) != 0 { + t.Fatalf("unexpected blocked: %+v", rep.Blocked) + } + // Feishu-native autofix expected to fire on

            ,

              ,
            • — content + // must still survive untouched even though structure is augmented. + for _, want := range []string{"line-height:1.6", "

              ", "title", "", "bold", ""} { + if !strings.Contains(rep.CleanedHTML, want) { + t.Errorf("expected %q in cleaned, got %q", want, rep.CleanedHTML) + } + } +} + +// TestRun_EmptyInput verifies the lib short-circuits cleanly on empty input. +func TestRun_EmptyInput(t *testing.T) { + rep := Run("", Options{}) + if rep.CleanedHTML != "" { + t.Errorf("CleanedHTML = %q, want empty", rep.CleanedHTML) + } + if len(rep.Applied) != 0 || len(rep.Blocked) != 0 { + t.Errorf("empty input must produce empty findings") + } +} + +// TestRun_HasErrorFindingsFlag verifies the flag tracks blocked findings. +func TestRun_HasErrorFindingsFlag(t *testing.T) { + rep := Run(``, Options{}) + if !rep.HasErrorFindings { + t.Error("expected HasErrorFindings=true") + } + clean := Run(`

              safe

              `, Options{}) + if clean.HasErrorFindings { + t.Error("expected HasErrorFindings=false on clean HTML") + } +} + +// TestRun_HasWarningFindingsFlag verifies the flag tracks warnings. +func TestRun_HasWarningFindingsFlag(t *testing.T) { + rep := Run(`x`, Options{}) + if !rep.HasWarningFindings { + t.Error("expected HasWarningFindings=true") + } +} + +// ===================================================================== +// Excerpt cap. +// ===================================================================== + +// TestTruncateExcerpt_RespectsCap verifies the per-finding excerpt cap. +func TestTruncateExcerpt_RespectsCap(t *testing.T) { + long := strings.Repeat("x", MaxExcerptBytes+50) + got := truncateExcerpt(long) + if len(got) > MaxExcerptBytes { + t.Errorf("excerpt len %d exceeds cap %d", len(got), MaxExcerptBytes) + } + if !strings.HasSuffix(got, " ...") { + t.Errorf("expected truncation suffix, got %q", got[len(got)-10:]) + } +} + +// TestRun_ExcerptCappedForLargeOffender verifies large blocked content +// produces a short excerpt (envelope size protection). +func TestRun_ExcerptCappedForLargeOffender(t *testing.T) { + bigAttr := strings.Repeat("a", MaxExcerptBytes*2) + rep := Run(`x`, Options{}) + if len(rep.Blocked) == 0 { + t.Fatal("expected blocked finding") + } + for _, f := range rep.Blocked { + if len(f.Excerpt) > MaxExcerptBytes { + t.Errorf("excerpt len %d exceeds cap %d", len(f.Excerpt), MaxExcerptBytes) + } + } +} + +// ===================================================================== +// Helpers. +// ===================================================================== + +func sliceContains(haystack []string, needle string) bool { + for _, s := range haystack { + if s == needle { + return true + } + } + return false +} + +func min(a, b int) int { + if a < b { + return a + } + return b +} + +// ===================================================================== +// Additional coverage for edge cases and exhaustive value mapping. +// ===================================================================== + +// TestMapFontSize_ExhaustiveSpan covers every mapping +// + invalid values fall through to "" so the property is dropped. +func TestMapFontSize_ExhaustiveSpan(t *testing.T) { + cases := map[string]string{ + "1": "10px", + "2": "13px", + "3": "16px", + "4": "18px", + "5": "24px", + "6": "32px", + "7": "48px", + "": "", + "8": "", + "abc": "", + "3.5": "", + " 3 ": "16px", + } + for raw, want := range cases { + got := mapFontSize(raw) + if got != want { + t.Errorf("mapFontSize(%q) = %q, want %q", raw, got, want) + } + } +} + +// TestRun_FontTagWithFaceMappedToFontFamily ensures → +// font-family inline style. +func TestRun_FontTagWithFaceMappedToFontFamily(t *testing.T) { + rep := Run(`x`, Options{}) + if !strings.Contains(rep.CleanedHTML, "font-family:Arial") { + t.Errorf("expected font-family preserved, cleaned=%q", rep.CleanedHTML) + } +} + +// TestRun_FontTagWithExistingStyleMerged ensures distillation merges with an +// existing style attribute on the same element. +func TestRun_FontTagWithExistingStyleMerged(t *testing.T) { + rep := Run(`x`, Options{}) + if !strings.Contains(rep.CleanedHTML, "line-height:1.6") { + t.Errorf("expected line-height retained, cleaned=%q", rep.CleanedHTML) + } + if !strings.Contains(rep.CleanedHTML, "color:red") { + t.Errorf("expected color merged, cleaned=%q", rep.CleanedHTML) + } +} + +// TestRun_CenterTagWithExistingStyleMerged ensures
              's style merge. +func TestRun_CenterTagWithExistingStyleMerged(t *testing.T) { + rep := Run(`
              x
              `, Options{}) + if !strings.Contains(rep.CleanedHTML, "text-align:center") { + t.Errorf("expected text-align:center, cleaned=%q", rep.CleanedHTML) + } + if !strings.Contains(rep.CleanedHTML, "line-height:1.6") { + t.Errorf("expected line-height preserved, cleaned=%q", rep.CleanedHTML) + } +} + +// TestRun_MarqueeRetainsClassAndID verifies marquee → span keeps class/id. +func TestRun_MarqueeRetainsClassAndID(t *testing.T) { + rep := Run(`y`, Options{}) + if !strings.Contains(rep.CleanedHTML, `class="cls"`) { + t.Errorf("expected class preserved, cleaned=%q", rep.CleanedHTML) + } + if strings.Contains(rep.CleanedHTML, `direction`) { + t.Errorf("expected marquee-specific attrs stripped, cleaned=%q", rep.CleanedHTML) + } +} + +// TestRun_UnknownSchemeBlocked verifies an unknown URL scheme produces a +// blocked (error) finding and the attribute is dropped. +func TestRun_UnknownSchemeBlocked(t *testing.T) { + rep := Run(`x`, Options{}) + gotBlocked := false + for _, f := range rep.Blocked { + if f.RuleID == RuleAttrUnsafeSchemeBlocked { + gotBlocked = true + } + } + if !gotBlocked { + t.Errorf("expected ATTR_UNSAFE_SCHEME_BLOCKED in Blocked, got blocked=%+v applied=%+v", rep.Blocked, rep.Applied) + } + if strings.Contains(rep.CleanedHTML, "webcal:") { + t.Errorf("expected unknown scheme stripped, cleaned=%q", rep.CleanedHTML) + } +} + +// TestRun_WhitespaceObfuscatedJavaScriptScheme verifies "java\tscript:..." +// is still caught after control-byte stripping in classifyURLValue. +func TestRun_WhitespaceObfuscatedJavaScriptScheme(t *testing.T) { + rep := Run("x", Options{}) + gotErr := false + for _, f := range rep.Blocked { + if f.RuleID == RuleAttrJSURLBlocked { + gotErr = true + } + } + if !gotErr { + t.Errorf("expected obfuscated javascript: to be caught, got %+v", rep.Blocked) + } +} + +// TestRun_FileSchemeBlocked verifies file: URLs are rejected. +func TestRun_FileSchemeBlocked(t *testing.T) { + rep := Run(`x`, Options{}) + if len(rep.Blocked) == 0 { + t.Error("expected file: to be blocked") + } +} + +// TestRun_StyleMalformedDeclarationDropped verifies a property without a +// colon delimiter is treated as malformed and dropped. +func TestRun_StyleMalformedDeclarationDropped(t *testing.T) { + rep := Run(`

              x

              `, Options{}) + gotMalformed := false + for _, f := range rep.Applied { + if f.RuleID == RuleStylePropertyDropped && f.TagOrAttr == "style.malformed" { + gotMalformed = true + } + } + if !gotMalformed { + t.Errorf("expected malformed declaration to be dropped, got %+v", rep.Applied) + } + if !strings.Contains(rep.CleanedHTML, "color:red") || !strings.Contains(rep.CleanedHTML, "line-height:1.6") { + t.Errorf("valid declarations should survive, cleaned=%q", rep.CleanedHTML) + } +} + +// TestRun_StyleAllPropertiesDroppedRemovesAttribute verifies the style +// attribute is removed entirely when every property is invalid. +func TestRun_StyleAllPropertiesDroppedRemovesAttribute(t *testing.T) { + // Use
              to avoid the Feishu-native paragraph autofix, which adds + // a fresh style attribute on the rewritten outer wrapper. + rep := Run(`
              x
              `, Options{}) + if strings.Contains(rep.CleanedHTML, "style=") { + t.Errorf("style attribute should be removed when all props invalid, cleaned=%q", rep.CleanedHTML) + } +} + +// TestRun_StyleEmptyValuePassThrough verifies an empty style attr passes. +func TestRun_StyleEmptyValuePassThrough(t *testing.T) { + // Use
              to avoid the Feishu-native paragraph autofix. + rep := Run(`
              x
              `, Options{}) + if len(rep.Applied) != 0 { + t.Errorf("empty style attr should not produce findings, got %+v", rep.Applied) + } +} + +// TestRun_HintsForAllBlockedTags verifies every blocked-tag rule has a +// non-empty hint (consumer contract). +func TestRun_HintsForAllBlockedTags(t *testing.T) { + cases := []string{ + ``, ``, + ``, ``, `
              `, + ``, ``, ``, + ``, ``, + } + for _, html := range cases { + rep := Run(html, Options{}) + for _, f := range rep.Blocked { + if f.Hint == "" { + t.Errorf("blocked rule %s missing hint for %q", f.RuleID, html) + } + } + } +} + +// TestRun_HintsForAllWarnTags verifies every warn-tag rule has a non-empty hint. +func TestRun_HintsForAllWarnTags(t *testing.T) { + cases := []string{ + `x`, `
              x
              `, + `x`, `x`, + } + for _, html := range cases { + rep := Run(html, Options{}) + for _, f := range rep.Applied { + if f.Hint == "" { + t.Errorf("warn rule %s missing hint for %q", f.RuleID, html) + } + } + } +} + +// TestClassifyTag_Coverage exercises classifyTag with every category. +func TestClassifyTag_Coverage(t *testing.T) { + if k, _ := classifyTag("p"); k != "allow" { + t.Errorf("p classified as %q", k) + } + if k, id := classifyTag("script"); k != "error" || id != RuleTagScriptBlocked { + t.Errorf("script classified as %q/%q", k, id) + } + if k, id := classifyTag("font"); k != "warn" || id != RuleTagFontToSpan { + t.Errorf("font classified as %q/%q", k, id) + } + // Niche tag passes silently (e.g.
              ). + if k, _ := classifyTag("details"); k != "allow" { + t.Errorf("niche tag
              should pass through, got %q", k) + } + // Case-insensitive. + if k, _ := classifyTag("SCRIPT"); k != "error" { + t.Errorf("SCRIPT (uppercase) should still classify as error") + } +} + +// TestClassifyURLValue_CoverageEdges covers empty, whitespace-only, +// no-scheme variants. +func TestClassifyURLValue_CoverageEdges(t *testing.T) { + cases := map[string]string{ + "": "ok", + " ": "ok", + "https://x": "ok", + "https://x/path?q=1": "ok", + "#fragment": "ok", + "/relative": "ok", + "javascript:alert(1)": "error", + "vbscript:msgbox 1": "error", + "data:image/png;base64,XYZ": "ok", + "data:text/html,x` + + `

              y

              ` + rep := Run(html, Options{}) + if len(rep.Blocked) < 4 { + t.Errorf("expected ≥4 errors, got %d: %+v", len(rep.Blocked), rep.Blocked) + } +} + +// TestRun_NestedStructurePreserved verifies deep nesting passes through. +func TestRun_NestedStructurePreserved(t *testing.T) { + html := `

              deep

              ` + rep := Run(html, Options{}) + if len(rep.Blocked) != 0 { + t.Errorf("nested allowed tags should pass, got %+v", rep.Blocked) + } + if !strings.Contains(rep.CleanedHTML, "deep") { + t.Errorf("inner text lost, cleaned=%q", rep.CleanedHTML) + } +} + +// TestRun_BlockedInsideAllowedRemovedNotParent verifies that removing a +// blocked tag inside an allowed parent leaves the parent intact. +func TestRun_BlockedInsideAllowedRemovedNotParent(t *testing.T) { + html := `
              beforeafter
              ` + rep := Run(html, Options{}) + if !strings.Contains(rep.CleanedHTML, "before") || !strings.Contains(rep.CleanedHTML, "after") { + t.Errorf("parent text should survive, cleaned=%q", rep.CleanedHTML) + } + if strings.Contains(rep.CleanedHTML, "
                nested +// directly without an
              • wrapper triggers LIST_DIRECT_CHILD_NON_LI and +// the inner
                  ends up wrapped in a synthetic
                • . Same for
                      . +func TestRun_ListDirectChildNonLIWrapped(t *testing.T) { + cases := []struct { + name string + html string + }{ + {"ul wraps ul", `
                        • x
                      `}, + {"ol wraps ol", `
                        1. x
                      `}, + {"ul wraps div", `
                        orphan
                      • real
                      `}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + rep := Run(tc.html, Options{}) + gotRule := false + for _, f := range rep.Applied { + if f.RuleID == RuleListDirectChildNonLI { + gotRule = true + break + } + } + if !gotRule { + t.Errorf("expected LIST_DIRECT_CHILD_NON_LI, got %+v", rep.Applied) + } + // The cleaned HTML should not have a direct ul>ul or ol>ol or + // ul>div sequence anymore. + if strings.Contains(rep.CleanedHTML, "
                        wrapper, cleaned=%q", rep.CleanedHTML) + } + }) + } +} diff --git a/shortcuts/mail/lint/rules.go b/shortcuts/mail/lint/rules.go new file mode 100644 index 000000000..746bad428 --- /dev/null +++ b/shortcuts/mail/lint/rules.go @@ -0,0 +1,353 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package lint + +import "strings" + +// Rule IDs surfaced through Finding.RuleID. UPPER_SNAKE_CASE naming is the +// contract for the stdout envelope. New rules MUST keep this naming convention +// so AI / test consumers can pattern-match reliably. +const ( + // Tag-level rules. + RuleTagFontToSpan = "TAG_FONT_TO_SPAN" + RuleTagCenterToDiv = "TAG_CENTER_TO_DIV" + RuleTagMarqueeToText = "TAG_MARQUEE_TO_TEXT" + RuleTagBlinkToText = "TAG_BLINK_TO_TEXT" + RuleTagScriptBlocked = "TAG_SCRIPT_BLOCKED" + RuleTagIframeBlocked = "TAG_IFRAME_BLOCKED" + RuleTagObjectBlocked = "TAG_OBJECT_BLOCKED" + RuleTagEmbedBlocked = "TAG_EMBED_BLOCKED" + RuleTagFormBlocked = "TAG_FORM_BLOCKED" + RuleTagInputBlocked = "TAG_INPUT_BLOCKED" + RuleTagLinkBlocked = "TAG_LINK_BLOCKED" + RuleTagMetaBlocked = "TAG_META_BLOCKED" + RuleTagBaseBlocked = "TAG_BASE_BLOCKED" + RuleTagUnknownStripped = "TAG_UNKNOWN_STRIPPED" + + // Attribute-level rules. + RuleAttrEventHandlerBlocked = "ATTR_EVENT_HANDLER_BLOCKED" + RuleAttrJSURLBlocked = "ATTR_JS_URL_BLOCKED" + RuleAttrUnsafeSchemeBlocked = "ATTR_UNSAFE_SCHEME_BLOCKED" + + // Style-level rules. + RuleStylePropertyDropped = "STYLE_PROPERTY_DROPPED" + + // Feishu-native autofix rules. These autofix the inline style / + // class / nesting shape of common elements so AI-authored HTML + // matches what Feishu mail-editor itself emits, fixing the visual + // "extra blank line between blocks", "list bullets/numbers missing", + // "link color wrong" etc. classes of issues. The rewrite is purely + // additive — user-supplied inline styles take precedence; the lib + // only fills the missing properties. + RuleStyleListNative = "STYLE_LIST_NATIVE_INLINE_APPLIED" + RuleStyleListItemNative = "STYLE_LIST_ITEM_NATIVE_INLINE_APPLIED" + RuleStyleBlockquoteNative = "STYLE_BLOCKQUOTE_NATIVE_INLINE_APPLIED" + RuleStyleLinkNative = "STYLE_LINK_NATIVE_INLINE_APPLIED" + RuleStyleParaWrapper = "STYLE_PARA_WRAPPER_REWRITTEN" + + // RuleListDirectChildNonLI fires when a
                          or
                            has a non-
                          1. + // element child (e.g. nested
                                ). HTML spec requires list children + // to be
                              • ; browsers silently hoist the nested list out and the visual + // nesting falls apart. The lib autofixes by wrapping the offending child + // in a synthetic
                              • . + RuleListDirectChildNonLI = "LIST_DIRECT_CHILD_NON_LI" +) + +// Tag classification ---------------------------------------------------------- + +// allowedTags enumerates tags that pass through verbatim (tag classification row "通过"). +// Lower-case canonical names; the parser normalises tag names so we don't need +// case-insensitive comparison at lookup time. +var allowedTags = map[string]bool{ + "p": true, + "div": true, + "span": true, + "br": true, + "hr": true, + "a": true, + "img": true, + "table": true, + "thead": true, + "tbody": true, + "tfoot": true, + "tr": true, + "td": true, + "th": true, + "ul": true, + "ol": true, + "li": true, + "blockquote": true, + "pre": true, + "code": true, + "b": true, + "i": true, + "em": true, + "strong": true, + "u": true, + "s": true, + "strike": true, + "h1": true, + "h2": true, + "h3": true, + "h4": true, + "h5": true, + "h6": true, + "sub": true, + "sup": true, + "section": true, + "article": true, + "header": true, + "footer": true, + "nav": true, + "main": true, + "figure": true, + "figcaption": true, + "caption": true, + "colgroup": true, + "col": true, + // Document structural tags (golang.org/x/net/html always wraps fragments + // in ); we treat them as transparent so the wrapper + // nodes the parser inserts don't generate spurious findings. + "html": true, + "head": true, + "body": true, +} + +// blockedTags enumerates tags whose content is removed in full and a +// SeverityError finding is emitted (tag classification row "错误(删除)"). Each entry +// maps to the rule id surfaced in Finding.RuleID. +var blockedTags = map[string]string{ + "script": RuleTagScriptBlocked, + "iframe": RuleTagIframeBlocked, + "object": RuleTagObjectBlocked, + "embed": RuleTagEmbedBlocked, + "form": RuleTagFormBlocked, + "input": RuleTagInputBlocked, + "select": RuleTagInputBlocked, + "option": RuleTagInputBlocked, + "button": RuleTagInputBlocked, + "link": RuleTagLinkBlocked, + "meta": RuleTagMetaBlocked, + "base": RuleTagBaseBlocked, +} + +// warnAutofixTags enumerates tags rewritten when AutoFix is true (tag +// classification row "警告 + 自动修复"). The replacement strategy is per-tag. +var warnAutofixTags = map[string]string{ + "font": RuleTagFontToSpan, + "center": RuleTagCenterToDiv, + "marquee": RuleTagMarqueeToText, + "blink": RuleTagBlinkToText, +} + +// classifyTag returns the rule kind for the given lower-case tag name. +// +// kind is one of "allow", "warn", "error", "unknown". For "warn" / "error", +// ruleID names the firing rule; for "unknown", the caller falls back to +// allow-list-by-default but emits a hint via RuleTagUnknownStripped only when +// the tag is structurally suspect (e.g. -like). The cli's existing +// `htmlTagRe` regex is the de-facto allow-list shipping with the codebase, so +// we don't aggressively flag anything outside `allowedTags` — drop-through +// preserves user intent for niche tags (e.g. `
                                ` / ``) that +// browsers + Feishu native renderer already handle. +func classifyTag(tag string) (kind, ruleID string) { + tag = strings.ToLower(tag) + if allowedTags[tag] { + return "allow", "" + } + if id, ok := blockedTags[tag]; ok { + return "error", id + } + if id, ok := warnAutofixTags[tag]; ok { + return "warn", id + } + // Unknown / niche tags: pass through silently. The cli's existing + // `htmlTagRe` (mail_quote.go:333) tolerates them too. Users authoring + // HTML in Feishu native classes (`adit-html-block*`, `history-quote-*`, + // `lark-mail-doc-quote`) hit this path — they MUST pass through unchanged + // so reply / forward quote markup survives lint round-trips. + return "allow", "" +} + +// Attribute / URL / style classification -------------------------------------- + +// allowedURLSchemes lists URL schemes that pass through hyperlink-bearing +// attrs (`href`, `src`, `cite`, `formaction` etc.). Allowed: http(s), mailto, +// cid, data:image/*; everything else (notably javascript: and vbscript:) is +// blocked. Empty / relative URLs (no scheme) are always +// allowed because they resolve relatively at render time and pose no +// injection vector. +var allowedURLSchemes = map[string]bool{ + "http": true, + "https": true, + "mailto": true, + "cid": true, +} + +// blockedURLSchemes is the explicit deny-list. data:image/* is special-cased +// in classifyURLValue. +var blockedURLSchemes = map[string]bool{ + "javascript": true, + "vbscript": true, + "file": true, +} + +// classifyURLValue returns ("ok", "") if the URL value is acceptable, or +// ("error", ruleID) when it must be removed (javascript:/vbscript:/file:), +// or ("warn", ruleID) when the scheme is unrecognised but not actively +// dangerous. Empty values pass through (browsers ignore them). +func classifyURLValue(raw string) (kind, ruleID string) { + value := strings.TrimSpace(raw) + if value == "" { + return "ok", "" + } + // Strip leading whitespace + control bytes that could obscure the + // scheme (e.g. "java\tscript:..."). The html-parser already strips + // stray whitespace at attribute boundaries; this is defence-in-depth + // for older clients that paste from Word with U+0009 / U+0020 inside + // the scheme prefix. + value = strings.Map(func(r rune) rune { + if r < 0x20 || r == 0x7F { + return -1 + } + return r + }, value) + + // Find the colon delimiter; everything before it is the scheme. + colon := strings.IndexByte(value, ':') + if colon < 0 { + // No scheme → relative URL → allow. + return "ok", "" + } + scheme := strings.ToLower(value[:colon]) + rest := value[colon+1:] + + switch { + case allowedURLSchemes[scheme]: + return "ok", "" + case scheme == "data": + // data:image/* is whitelisted; anything else (e.g. data:text/html;...) + // is rejected. The check tolerates any subtype under image/* (png / + // jpeg / gif / svg+xml / webp) so users embedding base64 thumbnails + // don't trip the rule. + rest = strings.TrimSpace(rest) + if strings.HasPrefix(strings.ToLower(rest), "image/") { + return "ok", "" + } + return "error", RuleAttrJSURLBlocked + case blockedURLSchemes[scheme]: + return "error", RuleAttrJSURLBlocked + default: + // Unknown scheme: surface a warning so users see it but don't + // drop legitimate webcal:/tel: / similar in case downstream + // renders eventually support them. + return "warn", RuleAttrUnsafeSchemeBlocked + } +} + +// urlAttributes lists attributes whose value is a URL and must therefore +// pass classifyURLValue. Lower-case canonical names. +var urlAttributes = map[string]bool{ + "href": true, + "src": true, + "cite": true, + "formaction": true, + "action": true, + "background": true, + "poster": true, +} + +// allowedStyleProps enumerates CSS property names that pass through the +// inline `style="..."` attribute. Everything else is removed from the +// property list and surfaced via STYLE_PROPERTY_DROPPED. +// +// `border-*` / `padding-*` / `margin-*` are treated as prefix matches by +// classifyStyleProperty so the four directional variants (border-top etc.) +// are all admitted without enumerating each. +var allowedStyleProps = map[string]bool{ + "color": true, + "background-color": true, + "font-size": true, + "font-weight": true, + "font-style": true, + "text-align": true, + "text-decoration": true, + "line-height": true, + "padding": true, + "margin": true, + "border": true, + "width": true, + "height": true, + "display": true, + "text-indent": true, + // Quote-block / native Feishu styles (tag classification "通过"). + // Whitespace + word-break are part of the existing `
                                ` / quote
                                +	// wrapper styles in mail_quote.go (e.g. `bodyDivStyle`).
                                +	"white-space":         true,
                                +	"word-break":          true,
                                +	"word-wrap":           true,
                                +	"overflow":            true,
                                +	"overflow-wrap":       true,
                                +	"vertical-align":      true,
                                +	"list-style":          true,
                                +	"list-style-type":     true,
                                +	"list-style-position": true,
                                +	"transition":          true,
                                +	"font-family":         true,
                                +	"text-transform":      true,
                                +	"hyphens":             true,
                                +	"max-width":           true,
                                +	"min-width":           true,
                                +	"max-height":          true,
                                +	"min-height":          true,
                                +	"border-radius":       true,
                                +	"box-sizing":          true,
                                +	"opacity":             true,
                                +	"cursor":              true,
                                +}
                                +
                                +// stylePropAllowedPrefixes enumerates property name prefixes treated as
                                +// allowed regardless of suffix (e.g. "border-*"). A trailing "-" makes the
                                +// prefix self-documenting.
                                +var stylePropAllowedPrefixes = []string{
                                +	"border-",
                                +	"padding-",
                                +	"margin-",
                                +}
                                +
                                +// classifyStyleProperty reports whether the given lower-case property name
                                +// is in the allow-list (incl. prefix matches).
                                +func classifyStyleProperty(name string) bool {
                                +	name = strings.ToLower(strings.TrimSpace(name))
                                +	if name == "" {
                                +		return false
                                +	}
                                +	if allowedStyleProps[name] {
                                +		return true
                                +	}
                                +	for _, p := range stylePropAllowedPrefixes {
                                +		if strings.HasPrefix(name, p) {
                                +			return true
                                +		}
                                +	}
                                +	return false
                                +}
                                +
                                +// isEventHandlerAttr reports whether the attribute name is a DOM event
                                +// handler (`on*`). The lib removes every such attribute regardless of its
                                +// value (tag classification row "错误(删除)" + the well-known XSS vector).
                                +func isEventHandlerAttr(name string) bool {
                                +	name = strings.ToLower(strings.TrimSpace(name))
                                +	if !strings.HasPrefix(name, "on") {
                                +		return false
                                +	}
                                +	if len(name) <= 2 {
                                +		return false
                                +	}
                                +	// Defence-in-depth: avoid matching legitimate attrs whose name happens
                                +	// to begin with "on" (e.g. `onerror`-like attrs all start "on" + ascii
                                +	// letter). The `>= 'a'` check filters out "on-something" with hyphens.
                                +	c := name[2]
                                +	return (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9')
                                +}
                                diff --git a/shortcuts/mail/lint/types.go b/shortcuts/mail/lint/types.go
                                new file mode 100644
                                index 000000000..383b1e6db
                                --- /dev/null
                                +++ b/shortcuts/mail/lint/types.go
                                @@ -0,0 +1,92 @@
                                +// Copyright (c) 2026 Lark Technologies Pte. Ltd.
                                +// SPDX-License-Identifier: MIT
                                +
                                +// Package lint implements the mail-domain HTML lint lib used by `+lint-html`
                                +// and the writing-path internals of the compose 5 shortcuts (`+send`,
                                +// `+draft-create`, `+reply`, `+reply-all`, `+forward`) and `+draft-edit` body
                                +// ops. The lib classifies HTML tags / attributes / inline styles into three
                                +// tiers (pass / warn-and-autofix / error-delete) following the three-tier tag
                                +// classification. `
                                +
                                +
                                + +
                                +

                                [调研主题] 市场调研报告

                                +
                                [YYYY-MM-DD] | 调研者:[姓名] · [团队] | [关联系统 / 版本]
                                +
                                + +
                                +

                                调研背景

                                +
                                [一段话描述:本轮调研聚焦的赛道 / 行业背景 / 触发动机]。本轮调研覆盖 [N] 类玩家([类别 1] / [类别 2] / [类别 3] / [类别 4]),重点评估 [自家产品 / 团队] 在 [赛道名] 的位置、对外摩擦点,以及结合 [关联工作 / PR / 本期目标] 的待补能力。所有结论基于 [数据来源 1:公开资料 / 厂商文档 / 行业报告] + [数据来源 2:自有实测 / 内部调研笔记] + [数据来源 3:访谈 / 体验]。
                                +
                                + +
                                +
                                +
                                [N]
                                +
                                调研对象
                                +
                                +
                                +
                                [N]
                                +
                                已就绪能力
                                +
                                +
                                +
                                [N]
                                +
                                明确缺口
                                +
                                +
                                +
                                [N]
                                +
                                高优待办
                                +
                                +
                                + +
                                +

                                1. [章节标题:例 "全球市场态势"]

                                +
                                [一句话描述本节切分维度,例 "把市场按 '为谁设计' 切四象限"]
                                + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                玩家 / 对象定位 / 类型[关键评分维度]关键观察
                                [玩家 1][类别][标签][一句话观察]
                                [玩家 2][类别][标签][一句话观察]
                                [玩家 3][类别][标签][一句话观察]
                                [玩家 4][类别][标签][一句话观察]
                                +
                                + +
                                +

                                2. [章节标题:例 "接入摩擦点"] ⚠️ 风险

                                +
                                [一句话描述:从哪里观察 / 案例 / 数据来源]
                                + + + + + + + + + + + + + + + + + + + + + + + +
                                摩擦类型 / 维度具体表现业务影响
                                [摩擦 1][具体表现 / 案例][对业务 / 团队的影响]
                                [摩擦 2][具体表现][影响]
                                [摩擦 3][具体表现][影响]
                                +
                                + +
                                +

                                3. [章节标题:例 "新势力玩家详情" / "重点对象详细比较"]

                                +
                                +
                                +
                                [玩家 / 对象 1]
                                +
                                [一句话产品定位 / 核心能力 / 差异化]
                                +
                                关键差异:[一句话提炼]
                                +
                                +
                                +
                                [玩家 / 对象 2]
                                +
                                [产品定位]
                                +
                                关键差异:[一句话]
                                +
                                +
                                +
                                [玩家 / 对象 3]
                                +
                                [产品定位]
                                +
                                关键差异:[一句话]
                                +
                                +
                                +
                                [小结一句话:玩家共性 / 自家路线对比]
                                +
                                + +
                                +

                                4. [章节标题:例 "安全风险全景" / "潜在隐患"] ⚠️ 高危

                                +
                                [一句话描述:风险来源 / 关联前期工作]
                                + + + + + + + + + + + + + + + + + + + + + + + +
                                威胁 / 风险案例 / 来源自家现状
                                [风险 1][案例 / 来源链接 / 引用前期报告][标签]
                                [风险 2][案例 / 来源][标签]
                                [风险 3](重点)[案例 / 来源][标签]
                                +
                                + 结论:[一段话,提炼本章节最关键的判断 / 行动建议] +
                                +
                                + +
                                +

                                5. [章节标题:例 "自家已就绪能力"] ✓ 优势

                                +
                                [一句话描述:基于哪些 PR / 已交付的工作得出]
                                +
                                • [能力 1] — [简述 + 关联 PR / 文档链接]
                                • [能力 2] — [简述]
                                • [能力 3] — [简述]
                                • [能力 4] — [简述]
                                +
                                + +
                                +

                                6. [章节标题:例 "待补能力 / 机会清单"]

                                +
                                [一句话描述:清单口径 / 优先级判定依据]
                                + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
                                #优先级能力 / 缺口建议落地
                                1P0[能力 / 缺口 1][具体落地路径 / Owner / 估算]
                                2P0[能力 / 缺口 2][具体落地路径]
                                3P1[能力 / 缺口 3][具体落地路径]
                                4P1[能力 / 缺口 4][具体落地路径]
                                5P2[能力 / 缺口 5][具体落地路径]
                                +
                                + +
                                +

                                关联工作产出佐证

                                +
                                本调研报告中部分章节的依据来自下列在执行中的工作:
                                + +
                                + +
                                +

                                建议与下一步

                                +
                                1. [行动 1] — [具体路径 + 时间窗 + Owner]
                                2. [行动 2] — [具体路径 + 时间窗]
                                3. [行动 3] — [具体路径]
                                4. [行动 4] — [具体路径]
                                +
                                + +
                                +
                                调研者:[your@email] · [团队]|整合于 [YYYY-MM-DD]
                                +
                                关联材料:[文档 / 笔记路径 / 前期报告]
                                +
                                + +
                                diff --git a/skills/lark-mail/assets/templates/weekly--personal-report.html b/skills/lark-mail/assets/templates/weekly--personal-report.html new file mode 100644 index 000000000..2e1bd7567 --- /dev/null +++ b/skills/lark-mail/assets/templates/weekly--personal-report.html @@ -0,0 +1,43 @@ + +
                                [姓名] 个人工作周报 · [YYYY 第 NN 周]
                                +
                                [团队] · [角色]|周期 [YYYY-MM-DD] ~ [YYYY-MM-DD]
                                + +
                                本周工作内容
                                + +
                                1. [项目 / 主任务名称]已完成 · 📄 文档 · PR 链接
                                +
                                • [子项 1.1:动作描述,附数据 / 链接]
                                • [子项 1.2:动作描述]
                                • [子项 1.3:动作描述,含具体数字 / 占比 / 时长]
                                + +
                                2. [项目 / 主任务名称]进行中 · 📄 文档
                                +
                                • [子项 2.1:动作 + 当前进度 + 数据]
                                • [子项 2.2:动作 + 当前进度]
                                + +
                                3. [项目 / 主任务名称]已完成
                                +
                                • [子项 3.1]
                                • [子项 3.2]
                                + +
                                下周工作内容
                                + +
                                1. [项目 / 主任务名称]P0 · 预计 [YYYY-MM-DD]
                                +
                                • [子项 1.1:具体动作 + 推进方式,例「先 spike POC,再发 RFC 同协作方对齐方案」]
                                • [子项 1.2:里程碑 / 关键产出 + 完成方式]
                                • [子项 1.3:依赖 / 协作方 / 验收标准]
                                + +
                                2. [项目 / 主任务名称]P0 · 预计 [YYYY-MM-DD]
                                +
                                • [子项 2.1:动作 + 推进方式]
                                • [子项 2.2:里程碑 / 关键产出]
                                • [子项 2.3:依赖 / 验收]
                                + +
                                3. [项目 / 主任务名称]P1 · 预计 [YYYY-MM-DD]
                                +
                                • [子项 3.1:动作 + 推进方式]
                                • [子项 3.2:里程碑]
                                • [子项 3.3:协作方]
                                + +
                                4. [项目 / 主任务名称]P2 · 预计 [YYYY-MM-DD]
                                +
                                • [子项 4.1:动作 + 推进方式]
                                • [子项 4.2:依赖 / 关键产出]
                                + +
                                风险与疑问
                                +
                                • [风险 / 疑问 1] — [背景:描述风险来源 / 触发场景];[影响:会延期 / 阻塞哪些工作];[建议:希望得到的支持 / 决策方向 / 期望响应方(@姓名 / 团队)]
                                • [风险 / 疑问 2] — [背景];[影响];[建议]
                                • [风险 / 疑问 3] — [背景];[影响];[建议]
                                +
                                (若本周无风险 / 疑问,整段替换为:。)
                                + +
                                — [姓名] / [团队] / [日期]|[your@email]
                                diff --git a/skills/lark-mail/assets/templates/weekly--team-report.html b/skills/lark-mail/assets/templates/weekly--team-report.html new file mode 100644 index 000000000..6d26a90c0 --- /dev/null +++ b/skills/lark-mail/assets/templates/weekly--team-report.html @@ -0,0 +1,9 @@ + +
                                本周工作
                                +
                                1. [项目 / 事件 1 名称]@[姓名 a]@[姓名 b]
                                  文档:[文档名]
                                2. [项目 / 事件 2 名称]@[姓名 g]
                                  技术方案:[文档名] · 设计稿:[设计稿名]
                                  • [子项 2.1:含孙子项的动作主题]
                                    • [孙子项 2.1.1:必要时再细分一层;不需要可整段删除]@[姓名 h]
                                    • [孙子项 2.1.2]
                                  • [子项 2.2]@[姓名 i],进行中
                                  • [子项 2.3]@[姓名 j],评审中
                                3. [项目 / 事件 3 名称]@[姓名 k]@[姓名 l]阻塞
                                  阻塞分析:[文档名]
                                +
                                下周工作
                                +
                                1. [重点 1:项目 / 事件名]@[姓名 o],预计 [YYYY-MM-DD]
                                2. [重点 2:含子重点的项目]
                                  1. [子重点 a:动作 / 推进方式]@[姓名 p]
                                  2. [子重点 b:动作]@[姓名 q]
                                3. [重点 3:项目 / 事件名]@[姓名 r]@[姓名 s],预计 [YYYY-MM-DD]
                                4. [重点 4:项目 / 事件名]@[姓名 t],预计 [YYYY-MM-DD]
                                +
                                — [姓名] / [团队] / [日期]|[your@email]
                                diff --git a/skills/lark-mail/references/lark-mail-draft-create.md b/skills/lark-mail/references/lark-mail-draft-create.md index eeb016af9..36b6682dd 100644 --- a/skills/lark-mail/references/lark-mail-draft-create.md +++ b/skills/lark-mail/references/lark-mail-draft-create.md @@ -8,6 +8,8 @@ 如需修改已有草稿,不要使用此命令,请使用 `lark-cli mail +draft-edit`。 +**CRITICAL - 编辑邮件内容前 MUST 先用 Read 工具读取 [references/lark-mail-html.md](references/lark-mail-html.md),其中包含邮件书写规范** + ## 安全约束 此命令创建草稿——**不会**发送邮件。用户可以在飞书邮件 UI 中打开草稿查看详情,确认后再进入后续操作。因此: @@ -44,7 +46,8 @@ lark-cli mail +draft-create --to alice@example.com --subject '测试' --body 'te |------|------|------| | `--to ` | 否 | 完整收件人列表,多个用逗号分隔。支持 `Alice ` 格式。省略时草稿不带收件人(之后可通过 `+draft-edit` 添加) | | `--subject ` | 是 | 草稿主题 | -| `--body ` | 是 | 邮件正文。推荐使用 HTML 获得富文本排版;也支持纯文本(自动检测)。使用 `--plain-text` 可强制纯文本模式。支持 `` 相对路径自动解析为内嵌图片(仅支持相对路径,不支持绝对路径) | +| `--body ` | 二选一 | 邮件正文。推荐使用 HTML 获得富文本排版;也支持纯文本(自动检测)。使用 `--plain-text` 可强制纯文本模式。支持 `` 相对路径自动解析为内嵌图片(仅支持相对路径,不支持绝对路径)。与 `--body-file` 互斥 | +| `--body-file ` | 二选一 | 从文件读取邮件正文 HTML(相对路径,仅限 cwd 子树)。与 `--body` 互斥。文件大小上限 32 MB | | `--from ` | 否 | 发件人邮箱地址(EML From 头)。使用别名(send_as)发信时,设为别名地址并配合 `--mailbox` 指定所属邮箱。省略时使用邮箱主地址 | | `--mailbox ` | 否 | 邮箱地址,指定草稿所属的邮箱(默认回退到 `--from`,再回退到 `me`)。当发件人(`--from`)与邮箱不同时使用,如通过别名或 send_as 地址发信。可通过 `accessible_mailboxes` 查询可用邮箱 | | `--cc ` | 否 | 完整抄送列表,多个用逗号分隔 | diff --git a/skills/lark-mail/references/lark-mail-draft-edit.md b/skills/lark-mail/references/lark-mail-draft-edit.md index 366c5cf82..4dee1e5b8 100644 --- a/skills/lark-mail/references/lark-mail-draft-edit.md +++ b/skills/lark-mail/references/lark-mail-draft-edit.md @@ -10,7 +10,9 @@ - `--set-cc` - `--set-bcc` -**正文编辑和其他高级操作必须通过 `--patch-file`**。没有 `--set-body` flag。 +**正文整体替换的快捷方式:** `--body ` / `--body-file `(二选一互斥)会自动展开为 `set_body` op。如果只想做整段正文替换且不需要保留引用区,用这两个 flag 即可,无需写 patch-file。要保留引用区或做更精细的 op 组合,仍走 `--patch-file`。两个入口与 `--patch-file` 内的 `set_body` / `set_reply_body` 互斥。 + +**CRITICAL - 编辑邮件内容前 MUST 先用 Read 工具读取 [references/lark-mail-html.md](references/lark-mail-html.md),其中包含邮件书写规范** ### 正文编辑:两个 op 的选择 @@ -72,6 +74,8 @@ lark-cli mail +draft-edit --draft-id --set-subject '测试' --dry-run | `--set-to ` | 否 | 用此处提供的地址替换整个 To 收件人列表 | | `--set-cc ` | 否 | 用此处提供的地址替换整个 Cc 抄送列表 | | `--set-bcc ` | 否 | 用此处提供的地址替换整个 Bcc 密送列表 | +| `--body ` | 否 | 整段替换正文(自动展开为 `set_body` op)。与 `--body-file` 互斥;与 `--patch-file` 内的 `set_body` / `set_reply_body` op 互斥 | +| `--body-file ` | 否 | 从文件读取正文 HTML(相对路径,仅限 cwd 子树)。与 `--body` 互斥。文件大小上限 32 MB | | `--set-priority ` | 否 | 设置邮件优先级:`high`、`normal`、`low`。设为 `normal` 会清除已有优先级 | | `--set-event-summary ` | 否 | 设置日程标题。需同时设置 `--set-event-start` 和 `--set-event-end` | | `--set-event-start