arxlang · xmnlab · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026 · Apr 27, 2026
diff --git a/docs/index.md b/docs/index.md
@@ -1,9 +1,9 @@
 # ArxLang
 
-Arx is a multi-purpose compiler that aims to provide native list and tensor
-abstractions backed internally by IRx runtime support. It uses the power of
-[LLVM](https://llvm.org/) to provide multi-architecture machine target code
-generation.
+Arx is a multi-purpose compiler that aims to provide native list, tensor, and
+dataframe abstractions backed internally by IRx runtime support. It uses the
+power of [LLVM](https://llvm.org/) to provide multi-architecture machine target
+code generation.
 
 The language syntax is influenced by Python, C++, and YAML, featuring
 significant whitespace, static typing (planned), and a focus on data-oriented
@@ -43,8 +43,9 @@ from Arx.
 
 - **LLVM-powered** -- compiles to native machine code via LLVM
 - **Python-like syntax** -- indentation-based blocks, familiar keywords
-- **Lists and tensors** -- generic collections plus Arrow-backed numeric tensors
-  with first-class indexing and compiler-known shapes
+- **Lists, tensors, and dataframes** -- generic collections, Arrow-backed
+  numeric tensors with compiler-known shapes, and Arrow-backed named-column
+  DataFrames
 - **Multiple output modes** -- inspect tokens, AST, LLVM IR, or compile to
   object files
 

diff --git a/docs/irx/runtime-features.md b/docs/irx/runtime-features.md
@@ -46,6 +46,8 @@ when needed.
 - `array` Declares the builtin one-dimensional Arrow array runtime surface.
 - `tensor` Declares the builtin homogeneous N-dimensional Arrow tensor runtime
   surface.
+- `dataframe` Declares the builtin heterogeneous named-column Arrow table
+  runtime surface.
 - `list` Declares the minimal dynamic-list runtime used by `ListCreate`,
   `ListAppend`, and lowered list indexing.
 
@@ -175,6 +177,17 @@ Current initial Tensor layer alongside that substrate:
   storage
 - current tensor lowering supports fixed-width numeric element types only
 
+Current initial DataFrame layer alongside that substrate:
+
+- dataframe values are created through `irx_arrow_table_*` runtime symbols
+- dataframe construction stores named columns in Arrow C++ `arrow::Table`
+  handles
+- series values are column views backed by Arrow C++ `arrow::ChunkedArray`
+  handles
+- static column access resolves to a known column index during semantic analysis
+- current dataframe lowering supports fixed-width numeric and `bool` columns
+  only
+
 What IRx does not do here:
 
 - no direct LLVM struct encoding of Arrow containers

diff --git a/docs/library/built-in-types.md b/docs/library/built-in-types.md
@@ -6,27 +6,30 @@ their canonical spellings, accepted aliases, and current surface syntax.
 
 ## Overview
 
-| Canonical type               | Accepted aliases | Category   | Example                                          | Notes                                    |
-| ---------------------------- | ---------------- | ---------- | ------------------------------------------------ | ---------------------------------------- |
-| `i8`                         | `int8`           | integer    | `var a: i8 = 8`                                  | 8-bit integer                            |
-| `i16`                        | `int16`          | integer    | `var b: i16 = 16`                                | 16-bit integer                           |
-| `i32`                        | `int32`          | integer    | `var c: i32 = 32`                                | 32-bit integer                           |
-| `i64`                        | `int64`          | integer    | `var d: i64 = 64`                                | 64-bit integer                           |
-| `f16`                        | `float16`        | float      | `var x: f16 = 1.5`                               | 16-bit float                             |
-| `f32`                        | `float32`        | float      | `var y: f32 = 3.25`                              | 32-bit float                             |
-| `f64`                        | `float64`        | float      | `var z: f64 = 9.5`                               | 64-bit float                             |
-| `bool`                       | `boolean`        | boolean    | `var ok: bool = true`                            | Uses `true` and `false` literals         |
-| `none`                       | —                | unit       | `fn log() -> none:`                              | Also the single value of the `none` type |
-| `str`                        | `string`         | text       | `var s: str = "hi"`                              | UTF-8 string                             |
-| `char`                       | —                | text       | `var ch: char = 'A'`                             | Currently mapped to `i8`                 |
-| `datetime`                   | —                | temporal   | `datetime("2026-03-05T12:30:59")`                | Constructor-style literal form           |
-| `timestamp`                  | —                | temporal   | `timestamp("2026-03-05T12:30:59Z")`              | Constructor-style literal form           |
-| `date`                       | —                | temporal   | `var d: date`                                    | Recognized as a built-in type name       |
-| `time`                       | —                | temporal   | `var t: time`                                    | Recognized as a built-in type name       |
-| `list[T]`                    | —                | collection | `var ids: list[i32] = [1, 2, 3]`                 | Generic collection type                  |
-| `tensor[T, N]`               | —                | collection | `var ids: tensor[i32, 4] = [1, 2, 3, 4]`         | Fixed-shape 1D numeric tensor            |
-| `tensor[T, d0, d1, ..., dN]` | —                | collection | `var grid: tensor[i32, 2, 2] = [[1, 2], [3, 4]]` | Fixed-shape multidimensional tensor      |
-| `tensor[T, ...]`             | —                | collection | `fn sink(values: tensor[i32, ...]) -> none:`     | Runtime-shaped tensor parameter          |
+| Canonical type               | Accepted aliases | Category   | Example                                               | Notes                                    |
+| ---------------------------- | ---------------- | ---------- | ----------------------------------------------------- | ---------------------------------------- |
+| `i8`                         | `int8`           | integer    | `var a: i8 = 8`                                       | 8-bit integer                            |
+| `i16`                        | `int16`          | integer    | `var b: i16 = 16`                                     | 16-bit integer                           |
+| `i32`                        | `int32`          | integer    | `var c: i32 = 32`                                     | 32-bit integer                           |
+| `i64`                        | `int64`          | integer    | `var d: i64 = 64`                                     | 64-bit integer                           |
+| `f16`                        | `float16`        | float      | `var x: f16 = 1.5`                                    | 16-bit float                             |
+| `f32`                        | `float32`        | float      | `var y: f32 = 3.25`                                   | 32-bit float                             |
+| `f64`                        | `float64`        | float      | `var z: f64 = 9.5`                                    | 64-bit float                             |
+| `bool`                       | `boolean`        | boolean    | `var ok: bool = true`                                 | Uses `true` and `false` literals         |
+| `none`                       | —                | unit       | `fn log() -> none:`                                   | Also the single value of the `none` type |
+| `str`                        | `string`         | text       | `var s: str = "hi"`                                   | UTF-8 string                             |
+| `char`                       | —                | text       | `var ch: char = 'A'`                                  | Currently mapped to `i8`                 |
+| `datetime`                   | —                | temporal   | `datetime("2026-03-05T12:30:59")`                     | Constructor-style literal form           |
+| `timestamp`                  | —                | temporal   | `timestamp("2026-03-05T12:30:59Z")`                   | Constructor-style literal form           |
+| `date`                       | —                | temporal   | `var d: date`                                         | Recognized as a built-in type name       |
+| `time`                       | —                | temporal   | `var t: time`                                         | Recognized as a built-in type name       |
+| `list[T]`                    | —                | collection | `var ids: list[i32] = [1, 2, 3]`                      | Generic collection type                  |
+| `tensor[T, N]`               | —                | collection | `var ids: tensor[i32, 4] = [1, 2, 3, 4]`              | Fixed-shape 1D numeric tensor            |
+| `tensor[T, d0, d1, ..., dN]` | —                | collection | `var grid: tensor[i32, 2, 2] = [[1, 2], [3, 4]]`      | Fixed-shape multidimensional tensor      |
+| `tensor[T, ...]`             | —                | collection | `fn sink(values: tensor[i32, ...]) -> none:`          | Runtime-shaped tensor parameter          |
+| `dataframe[name: T, ...]`    | —                | collection | `var rows: dataframe[id: i32] = dataframe({id: [1]})` | Static-schema DataFrame                  |
+| `dataframe[...]`             | —                | collection | `fn sink(rows: dataframe[...]) -> none:`              | Runtime-schema DataFrame parameter       |
+| `series[T]`                  | —                | collection | `var ids: series[i32] = rows["id"]`                   | Typed DataFrame column                   |
 
 ## Numeric Types
 
@@ -94,24 +97,27 @@ fn time_demo() -> none:
 The parser also recognizes `date` and `time` as built-in type names in
 annotations.
 
-## Collections and tensors
+## Collections, tensors, and dataframes
 
 Arx exposes two public collection constructors:
 
 - `list[T]` for generic collection values
 - `tensor[T, N]` for fixed-shape 1D numeric tensors
 - `tensor[T, d0, d1, ..., dN]` for fixed-shape multidimensional tensors
 - `tensor[T, ...]` for runtime-shaped tensor parameters
+- `dataframe[name: T, ...]` for static-schema named-column DataFrames
+- `dataframe[...]` for runtime-schema DataFrame parameters
+- `series[T]` for typed DataFrame columns
 
 In the fixed-shape form, `...` is documentation prose for additional integer
 dimensions. The literal `...` marker is reserved for runtime-shaped tensor
-parameters.
+parameters and runtime-schema DataFrame parameters.
 
 The naming is intentional: Arx uses `Tensor` for homogeneous N-dimensional data,
 aligning with common data-science terminology and IRx's Arrow C++ backed
-runtime. `Array` remains the term for one-dimensional Arrow-style data where it
-is exposed, and future dataframe/table support will be separate and
-heterogeneous.
+runtime. `DataFrame` is the heterogeneous named-column abstraction backed by
+Arrow C++ `Table`, and `Series` is the one-dimensional typed column view backed
+by Arrow C++ `ChunkedArray`.
 
 ```arx
 fn tensor_demo() -> none:
@@ -139,6 +145,35 @@ Current tensor rules in this phase:
 - current lowering is read-only and is focused on literal/default-initialized
   shaped tensors
 
+Current DataFrame rules in this phase:
+
+- column types are fixed-width numeric types (`i8`, `i16`, `i32`, `i64`, `f32`,
+  `f64`) or `bool`
+- string, nullable, nested, temporal, and user-defined columns are not part of
+  the MVP yet
+- static-schema values use `dataframe[name: T, ...]` annotations and the
+  column-oriented `dataframe({...})` constructor
+- constructor columns must be list literals, use declared column names, and have
+  equal row counts
+- columns can be accessed as `rows.score` or `rows["score"]`
+- `rows.nrows()` and `rows.ncols()` return row and column counts as `i64`
+- column access and metadata methods currently work on DataFrame identifiers and
+  literals whose schema is known while parsing, not on arbitrary
+  DataFrame-returning expressions
+- `dataframe[...]` is accepted only in function and extern parameter annotations
+  for now; column access on runtime-schema parameters is not available yet
+
+```arx
+fn dataframe_demo() -> i32:
+  var rows: dataframe[id: i32, score: f64] = dataframe({
+    id: [1, 2, 3],
+    score: [0.5, 0.8, 1.0],
+  })
+  var scores: series[f64] = rows.score
+  var ids: series[i32] = rows["id"]
+  return cast(rows.nrows(), i32)
+```
+
 ## Casting
 
 Use the built-in `cast(value, type)` helper to convert values between supported

diff --git a/docs/library/datatypes.md b/docs/library/datatypes.md
@@ -28,8 +28,13 @@ fn add(a: i32, b: i32) -> i32:
 ```arx
 fn summarize(name: str, values: list[i32]) -> none:
   var grid: tensor[i32, 2, 2] = [[1, 2], [3, 4]]
+  var rows: dataframe[id: i32, score: f64] = dataframe({
+    id: [1, 2],
+    score: [0.5, 1.0],
+  })
   var count: i32 = 0
   print(grid[0, 1])
+  print(rows.nrows())
   return
 ```
 
@@ -42,12 +47,20 @@ Common places where types appear:
 - shaped 1D tensor annotations: `tensor[i32, 4]`
 - multidimensional tensor annotations: `tensor[i32, 2, 2]`
 - runtime-shaped tensor parameters: `fn sink(x: tensor[i32, ...]) -> none:`
+- static-schema DataFrame annotations: `dataframe[id: i32, score: f64]`
+- runtime-schema DataFrame parameters: `fn sink(rows: dataframe[...]) -> none:`
+- typed DataFrame column annotations: `series[f64]`
 
 `tensor[T, ...]` is currently parameter-only. Use fixed-shape tensor annotations
 for variables, fields, and return types until runtime-shaped storage and return
 semantics are defined. Runtime-shaped tensor parameters can be passed through,
 but indexed access currently requires a static-shape tensor annotation.
 
+`dataframe[...]` follows the same current restriction: it is accepted only in
+function and extern parameter annotations. Static-schema DataFrames can be
+constructed with `dataframe({...})`, and their columns can be accessed with
+either `rows.score` or `rows["score"]`.
+
 ## Built-in Type Reference
 
 For the catalog of built-in types, aliases, and examples, see
@@ -58,5 +71,5 @@ That page covers:
 - numeric types and aliases
 - `none` as the unit type and value
 - string, character, and temporal types
-- lists, tensors, and current limitations
+- lists, tensors, dataframes, series, and current limitations
 - the `cast(value, type)` helper
diff --git a/docs/roadmap.md b/docs/roadmap.md
@@ -43,4 +43,32 @@ type for each variable and function returning.
 
 ## Implement native tensors
 
-TBA
+Native tensors now have an initial Arrow C++ backed implementation. Remaining
+work should continue to make runtime-shaped tensor values usable in more
+contexts, while preserving the same runtime-layout rules for every collection
+type that uses that approach.
+
+- [ ] Expand runtime-layout annotations beyond function and extern parameters
+      once default values, ownership, and type checking are ready for local
+      declarations and expression contexts.
+- [ ] Keep tensor semantics aligned with the Arrow-backed runtime rather than
+      adding Arx-local lowering behavior.
+
+## DataFrames and Series
+
+DataFrames are a distinct public collection abstraction for heterogeneous named
+columns. Static-schema values use `dataframe[name: T, ...]`, column views use
+`series[T]`, and literals are constructed with `dataframe({...})`.
+
+- [x] Add the builtin `dataframe[...]` type.
+- [x] Add the builtin `series[T]` type for typed DataFrame columns.
+- [x] Add the builtin `dataframe({...})` constructor for column-oriented
+      literals.
+- [x] Back DataFrame values with Arrow C++ `arrow::Table`.
+- [x] Back Series values with Arrow C++ `arrow::ChunkedArray`.
+- [x] Keep the MVP limited to fixed-width numeric and `bool` columns.
+- [ ] Add string, nullable, nested, temporal, and user-defined column support
+      after the fixed-width MVP is stable.
+- [ ] Expand runtime-layout/schema annotations beyond function and extern
+      parameters, applying the same behavior to both `dataframe[...]` and
+      `tensor[T, ...]`.
diff --git a/examples/dataframe.x b/examples/dataframe.x
@@ -0,0 +1,24 @@
+```
+title: DataFrame example
+summary: Demonstrates fixed-width Arrow-backed DataFrame syntax.
+```
+
+fn row_count(rows: dataframe[id: i32, score: f64]) -> i32:
+  ```
+  title: row_count
+  summary: Returns the number of rows in a static-schema DataFrame.
+  ```
+  return cast(rows.nrows(), i32)
+
+fn main() -> i32:
+  ```
+  title: main
+  summary: Builds a DataFrame and accesses columns by name and string key.
+  ```
+  var rows: dataframe[id: i32, score: f64] = dataframe({
+    id: [1, 2, 3],
+    score: [0.5, 0.8, 1.0],
+  })
+  var scores: series[f64] = rows.score
+  var ids: series[i32] = rows["id"]
+  return row_count(rows)
diff --git a/packages/arx/src/arx/builtins.py b/packages/arx/src/arx/builtins.py
@@ -19,6 +19,7 @@
 _BUILTIN_RESOURCE_DIR = "builtins"
 
 BUILTIN_CAST = "cast"
+BUILTIN_DATAFRAME = "dataframe"
 BUILTIN_PRINT = "print"
 BUILTIN_RANGE = "range"
 _GENERATORS_MODULE = f"{BUILTIN_NAMESPACE}.generators"
@@ -69,6 +70,7 @@ class AmbientBuiltinBinding:
 
 __all__ = [
     "BUILTIN_CAST",
+    "BUILTIN_DATAFRAME",
     "BUILTIN_NAMESPACE",
     "BUILTIN_PRINT",
     "BUILTIN_RANGE",
@@ -96,7 +98,7 @@ def is_builtin(name: str) -> bool:
     returns:
       type: bool
     """
-    return name in {BUILTIN_CAST, BUILTIN_PRINT}
+    return name in {BUILTIN_CAST, BUILTIN_DATAFRAME, BUILTIN_PRINT}
 
 
 def build_cast(