A Declarative Layer Visualization Grammar for Deep Learning Models

This post outlines a compact, Layer Visualization Grammar (LVG) for building scalable, reproducible visualizations across different models like CNNs, LSTM, Transformers, and Diffusion models. The grammar supports both generic tensor views and targeted model-specific views, and it uses a global state manager to coordinate interactions across panels and sessions.

Core Spec Shape

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
interface LVGSpec {
  $schema: "https://lvg.dev/v1";
  id?: string;
  title?: string;
  description?: string;

  target: {
    model_id: string;            // runtime handle
    layer: string;               // e.g., "blocks.5.attn"
    pass?: "train" | "eval" | "inference";
    scope?: "forward" | "backward";
    batch?: number | "active";
  };

  inputs?: InputBinding[];
  parameters?: Param[];
  signals?: Signal[];
  state?: StateBindings;         // global state read/write

  views: (GenericView | TargetedView | CompositeView)[];
  layout?: { type?: "grid" | "hstack" | "vstack" | "tabs"; columns?: number; gap?: number; responsive?: "none" | "wrap" };
}

type InputBinding = {
  as: string;
  from: TensorRef | MetaRef;
  slice?: SliceSpec;
  normalize?: "none" | "layer" | "channel" | "token" | "minmax" | "zscore";
};

type TensorRef = { type: "tensor"; source: "layer_output" | "layer_input" | "param" | "upstream" | "downstream" | "cache"; name?: string };
type MetaRef = { type: "meta"; key: "shape" | "param_count" | "receptive_field" | "token_strings" | "class_labels" | "timesteps" };
type SliceSpec = { axes?: Array<number | "last">; index?: Array<number | { start?: number; end?: number; step?: number } | "all"> };

type Param = {
  name: string; value: number | string | boolean | number[] | string[];
  ui?: { type: "slider" | "select" | "checkbox" | "text" | "range"; min?: number; max?: number; step?: number; options?: Array<{label:string; value:any}> };
};

type Signal = { name: string; value?: any; on?: Array<{ event: UIEvent; update: Expr }> };
type UIEvent = { type: "pointerhover" | "click" | "brush" | "keydown"; view?: string; key?: string };

type StateBindings = {
  read?: Array<{ key: string; as: string; default?: any }>;
  write?: Array<{ key: string; from: Expr | string; on: UIEvent | "immediate" }>;
};

Views, Transforms, and Encodings

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
interface GenericView {
  type: "generic";
  id: string;
  title?: string;
  data: Dataflow;
  encoding: Encoding;
  interactions?: Interaction[];
  actions?: Action[];
}

interface TargetedView {
  type: "targeted";
  id: string;
  title?: string;
  preset:
    | "transformer_attention_matrix"
    | "transformer_head_grid"
    | "cnn_channel_heatmap"
    | "cnn_featuremap_gallery"
    | "diffusion_step_preview"
    | "diffusion_noise_to_image"
    | "rnn_gate_timelines"
    | "token_saliency_grid";
  inputs?: Record<string, string>;
  options?: Record<string, any>;
  interactions?: Interaction[];
  actions?: Action[];
}

interface CompositeView {
  type: "composite";
  id: string;
  title?: string;
  layout: LVGSpec["layout"];
  children: (GenericView | TargetedView)[];
}

type Dataflow = {
  sources: Array<{ name: string; from: string; kind: "tensor" | "meta" | "state" | "literal"; value?: any }>;
  transforms: Transform[];
  output: string;
};

type Transform =
  | { op: "reshape"; from: string; to: "table" | "matrix"; axis_names?: string[] }
  | { op: "slice"; from: string; spec: SliceSpec; as: string }
  | { op: "reduce"; from: string; dims?: number[]; fn: "mean" | "max" | "min" | "sum" | "std" | "var" | "norm"; keepdims?: boolean; as: string }
  | { op: "normalize"; from: string; mode: "minmax" | "zscore" | "l2"; along?: "row" | "col" | "global"; as: string }
  | { op: "project"; from: string; method: "pca" | "umap" | "tsne"; k: number; seed?: number; metric?: "cosine" | "euclidean"; as: string }
  | { op: "similarity"; from: string; against?: string; metric: "cosine" | "dot" | "jaccard"; topk?: number; threshold?: number; as: string }
  | { op: "compare"; a: string; b: string; mode: "diff" | "ratio" | "corr"; as: string }
  | { op: "rank"; from: string; by: string; order?: "asc" | "desc"; groupby?: string[]; as: string }
  | { op: "quantiles"; from: string; q: number[]; as: string }
  | { op: "aggregate"; from: string; by: string[]; ops: Array<{ fn: "mean"|"max"|"count"|"sum"|"std"; as: string }> }
  | { op: "threshold"; from: string; by: string; op: ">" | "<" | ">=" | "<="; value: number; as: string }
  | { op: "topk"; from: string; by: string; k: number; as: string }
  | { op: "join"; left: string; right: string; on: string[]; how: "inner" | "left" | "right" | "outer"; as: string }
  | { op: "expr"; from: string; expr: Expr; as: string };

type Encoding = {
  mark: "scatter" | "heatmap" | "image" | "line" | "bar" | "table";
  data: string;
  x?: Channel; y?: Channel; color?: Channel; size?: Channel; shape?: Channel; text?: Channel;
  axes?: { x?: Axis; y?: Axis };
  legend?: boolean | { position?: "right" | "bottom" };
  tooltip?: string[];
};

type Channel = { field: string; type: "quant" | "ordinal" | "nominal" | "index" };
type Axis = { label?: string; grid?: boolean; tickCount?: number };

type Interaction =
  | { type: "brush"; target: "x" | "y" | "xy"; signal: string }
  | { type: "select"; on: "click" | "hover"; fields: string[]; signal: string }
  | { type: "link"; with: string; using: string[] };

type Action =
  | { id: string; label: string; kind: "export_csv" | "export_png" | "write_state" | "mutate_model" | "compare_with_state"; payload?: any; from?: string; toStateKey?: string };

Targeted Presets: Contracts

transformer_attention_matrix
Inputs: attn [batch, heads, tokens, tokens], optional token_strings.
Options: head, softmax_temp, normalize_along (row|col).
transformer_head_grid
Inputs: Q, K, V.
Options: head selector; shows norms and head-wise similarities.
cnn_channel_heatmap
Inputs: acts [batch, channels, H, W].
Options: summary (l2|mean|max), rank_by (class_separation|variance).
cnn_featuremap_gallery
Inputs: acts.
Options: topk, normalize, tiling mode.
diffusion_step_preview
Inputs: x_t, eps_pred, timesteps, scheduler params.
Options: scheduler, eta, t, decode.
diffusion_noise_to_image
Inputs: same as above; runs a short denoise loop from t to t-k.
rnn_gate_timelines
Inputs: i,f,o,g over time.
Options: sequence range, aggregation.
token_saliency_grid
Inputs: saliency [tokens] or [layers,tokens], optional token_strings.
Options: normalization, threshold.

Example: Transformer Attention Debugger

The following spec composes generic and targeted views for a Transformer self-attention layer. It:

Projects token embeddings (from V) via DR and allows brushing.
Shows a head-selectable attention matrix targeted view.
Compares the current head’s attention submatrix to a pinned baseline stored in global state.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
{
  "$schema": "https://lvg.dev/v1",
  "id": "transformer-attn-debugger",
  "title": "Transformer Attention Debugger",
  "target": {
    "model_id": "active_model",
    "layer": "encoder.blocks.5.self_attn",
    "pass": "inference",
    "scope": "forward",
    "batch": "active"
  },
  "inputs": [
    { "as": "Q", "from": { "type": "tensor", "source": "layer_output", "name": "Q" } },
    { "as": "K", "from": { "type": "tensor", "source": "layer_output", "name": "K" } },
    { "as": "V", "from": { "type": "tensor", "source": "layer_output", "name": "V" } },
    { "as": "ATTN", "from": { "type": "tensor", "source": "layer_output", "name": "attn" } },
    { "as": "TOKENS", "from": { "type": "meta", "key": "token_strings" } }
  ],
  "parameters": [
    { "name": "head", "value": 0, "ui": { "type": "slider", "min": 0, "max": 11, "step": 1 } },
    {
      "name": "dr_method",
      "value": "pca",
      "ui": { "type": "select", "options": [ { "label": "PCA", "value": "pca" }, { "label": "UMAP", "value": "umap" } ] }
    }
  ],
  "signals": [
    { "name": "hover_token", "value": null },
    { "name": "selected_tokens", "value": [] }
  ],
  "state": {
    "read": [
      { "key": "debug.selected_tokens", "as": "selected_tokens", "default": [] },
      { "key": "debug.selected_head", "as": "head", "default": 0 }
    ],
    "write": [
      { "key": "debug.selected_tokens", "from": "selected_tokens", "on": { "type": "brush", "view": "token-embed" } },
      { "key": "debug.selected_head", "from": "head", "on": "immediate" }
    ]
  },
  "layout": { "type": "grid", "columns": 2, "gap": 12, "responsive": "wrap" },
  "views": [
    {
      "type": "generic",
      "id": "token-embed",
      "title": "Token Embedding (V) — DR Scatter",
      "data": {
        "sources": [
          { "name": "V_src", "from": "V", "kind": "tensor" },
          { "name": "TOK", "from": "TOKENS", "kind": "meta" }
        ],
        "transforms": [
          { "op": "reshape", "from": "V_src", "to": "table", "axis_names": ["batch","tokens","heads","dim"] },
          { "op": "reduce", "from": "V_src", "dims": [2], "fn": "mean", "keepdims": false, "as": "V_tokens" },
          { "op": "project", "from": "V_tokens", "method": { "$param": "dr_method" }, "k": 2, "as": "V_2d" },
          { "op": "expr", "from": "V_2d", "expr": "add_field(index(),'tok_idx')", "as": "V_xy" },
          { "op": "join", "left": "V_xy", "right": "TOK", "on": ["tok_idx"], "how": "left", "as": "V_labeled" }
        ],
        "output": "V_labeled"
      },
      "encoding": {
        "mark": "scatter",
        "data": "V_labeled",
        "x": { "field": "x0", "type": "quant" },
        "y": { "field": "x1", "type": "quant" },
        "color": { "field": "tok_idx", "type": "index" },
        "size": { "field": "variance", "type": "quant" },
        "tooltip": ["tok_idx", "token_string"]
      },
      "interactions": [
        { "type": "brush", "target": "xy", "signal": "selected_tokens" },
        { "type": "select", "on": "hover", "fields": ["tok_idx"], "signal": "hover_token" }
      ],
      "actions": [
        { "id": "export-embed", "label": "Export CSV", "kind": "export_csv", "from": "V_labeled" }
      ]
    },
    {
      "type": "targeted",
      "id": "attn-matrix",
      "title": "Attention Matrix — Head & Token Subset",
      "preset": "transformer_attention_matrix",
      "inputs": { "attn": "ATTN", "token_strings": "TOKENS" },
      "options": { "head": { "$param": "head" }, "normalize_along": "row" },
      "interactions": [
        { "type": "link", "with": "token-embed", "using": ["tok_idx"] }
      ],
      "actions": [
        { "id": "pin-head", "label": "Set as Compare A", "kind": "write_state", "toStateKey": "debug.compare.attnA", "from": "head" },
        { "id": "pin-selection", "label": "Set Tokens as Compare A", "kind": "write_state", "toStateKey": "debug.compare.tokensA", "from": "selected_tokens" }
      ]
    },
    {
      "type": "generic",
      "id": "attn-compare",
      "title": "Compare Current Head vs Pinned",
      "data": {
        "sources": [
          { "name": "ATTN_src", "from": "ATTN", "kind": "tensor" },
          { "name": "head_curr", "from": "head", "kind": "state" },
          { "name": "head_A", "from": "debug.compare.attnA", "kind": "state" },
          { "name": "sel_tokens", "from": "selected_tokens", "kind": "state" },
          { "name": "tokensA", "from": "debug.compare.tokensA", "kind": "state" }
        ],
        "transforms": [
          { "op": "slice", "from": "ATTN_src", "spec": { "axes": [0,1,2,3], "index": ["all", { "$state": "head_curr" }, "all", "all"] }, "as": "A_curr" },
          { "op": "slice", "from": "ATTN_src", "spec": { "axes": [0,1,2,3], "index": ["all", { "$state": "head_A" }, "all", "all"] }, "as": "A_pin" },
          { "op": "reduce", "from": "A_curr", "dims": [0], "fn": "mean", "as": "A_curr2D" },
          { "op": "reduce", "from": "A_pin",  "dims": [0], "fn": "mean", "as": "A_pin2D" },
          { "op": "slice", "from": "A_curr2D", "spec": { "axes": [0,1], "index": [ { "$state": "sel_tokens" }, { "$state": "sel_tokens" } ] }, "as": "A_curr_sub" },
          { "op": "slice", "from": "A_pin2D",  "spec": { "axes": [0,1], "index": [ { "$state": "tokensA" }, { "$state": "tokensA" } ] }, "as": "A_pin_sub" },
          { "op": "compare", "a": "A_curr_sub", "b": "A_pin_sub", "mode": "diff", "as": "A_diff" }
        ],
        "output": "A_diff"
      },
      "encoding": {
        "mark": "heatmap",
        "data": "A_diff",
        "x": { "field": "col", "type": "index" },
        "y": { "field": "row", "type": "index" },
        "color": { "field": "value", "type": "quant" },
        "tooltip": ["row","col","value"]
      },
      "actions": [
        { "id": "export-diff", "label": "Export Diff CSV", "kind": "export_csv", "from": "A_diff" }
      ]
    }
  ]
}

CNN Example: Channel Heatmap + DR Scatter

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
{
  "$schema": "https://lvg.dev/v1",
  "id": "cnn-channels",
  "title": "CNN Channel Heatmap + DR",
  "target": { "model_id": "active_model", "layer": "features.17", "pass": "inference", "scope": "forward", "batch": "active" },
  "inputs": [
    { "as": "ACTS", "from": { "type": "tensor", "source": "layer_output", "name": "activations" } },
    { "as": "LABELS", "from": { "type": "meta", "key": "class_labels" } }
  ],
  "parameters": [
    { "name": "summary", "value": "l2", "ui": { "type": "select", "options": [ { "label": "L2", "value": "l2" }, { "label": "Mean", "value": "mean" }, { "label": "Max", "value": "max" } ] } },
    { "name": "topk", "value": 64, "ui": { "type": "slider", "min": 8, "max": 256, "step": 8 } }
  ],
  "layout": { "type": "grid", "columns": 2, "gap": 12 },
  "views": [
    {
      "type": "targeted",
      "id": "chan-heat",
      "title": "Channel Heatmap (summary × images)",
      "preset": "cnn_channel_heatmap",
      "inputs": { "acts": "ACTS" },
      "options": { "summary": { "$param": "summary" }, "rank_by": "variance", "topk": { "$param": "topk" } }
    },
    {
      "type": "generic",
      "id": "chan-dr",
      "title": "Channel DR (H×W reduced)",
      "data": {
        "sources": [ { "name": "A", "from": "ACTS", "kind": "tensor" } ],
        "transforms": [
          { "op": "reduce", "from": "A", "dims": [2,3], "fn": "norm", "as": "A_ch" },  // [batch, channels]
          { "op": "reduce", "from": "A_ch", "dims": [0], "fn": "mean", "as": "A_ch_mean" }, // [channels]
          { "op": "project", "from": "A_ch_mean", "method": "pca", "k": 2, "as": "A_2d" },
          { "op": "expr", "from": "A_2d", "expr": "add_field(index(),'ch')", "as": "A_xy" }
        ],
        "output": "A_xy"
      },
      "encoding": {
        "mark": "scatter",
        "data": "A_xy",
        "x": { "field": "x0", "type": "quant" },
        "y": { "field": "x1", "type": "quant" },
        "color": { "field": "ch", "type": "index" },
        "tooltip": ["ch"]
      }
    }
  ]
}

Diffusion Example: Single-Step Preview

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
{
  "$schema": "https://lvg.dev/v1",
  "id": "diffusion-preview",
  "title": "Diffusion Step Preview (DDIM)",
  "target": { "model_id": "sd15", "layer": "unet.down_blocks.2", "pass": "inference", "scope": "forward", "batch": "active" },
  "inputs": [
    { "as": "X_T", "from": { "type": "tensor", "source": "upstream", "name": "x_t" } },
    { "as": "EPS", "from": { "type": "tensor", "source": "layer_output", "name": "eps_pred" } },
    { "as": "T", "from": { "type": "meta", "key": "timesteps" } }
  ],
  "parameters": [
    { "name": "t", "value": 350, "ui": { "type": "slider", "min": 0, "max": 999, "step": 1 } },
    { "name": "eta", "value": 0.0, "ui": { "type": "slider", "min": 0, "max": 1, "step": 0.05 } }
  ],
  "state": {
    "write": [
      { "key": "diff.t", "from": "t", "on": "immediate" }
    ]
  },
  "views": [
    {
      "type": "targeted",
      "id": "ddim-preview",
      "title": "x_{t-1} and Decoded Image",
      "preset": "diffusion_step_preview",
      "inputs": { "x_t": "X_T", "eps_pred": "EPS", "timesteps": "T" },
      "options": { "scheduler": "ddim", "eta": { "$param": "eta" }, "t": { "$param": "t" }, "decode": true },
      "actions": [
        { "id": "pin-t", "label": "Pin t", "kind": "write_state", "toStateKey": "diff.t_pinned", "from": "t" }
      ]
    }
  ]
}

Implementation Notes

Backend adapters: Resolve TensorRef to framework tensors; convert to ndarray for transforms. Cache transform outputs keyed by (model_id, layer, input_name, params_hash).
Presets: Thin wrappers over generic transforms with domain helpers (e.g., softmax temperature, head tiling, DDIM step).
Global state: JSON-serializable values with subscriptions; updates trigger recomputation of dependent specs.
Interactivity: Brushing and linking rely on shared identifiers (tok_idx, ch, t) across views.