Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions case_studies/gpu/matmul/matmul.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#include <optitrust_models.h>

__DECL(reduce_sum, "int * (int -> float) -> float");
__AXIOM(reduce_sum_empty, "forall (f: int -> float) -> 0.f =. reduce_sum(0, f)");
__AXIOM(reduce_sum_add_right, "forall (n: int) (f: int -> float) (_: n >= 0) -> reduce_sum(n, f) +. f(n) =. reduce_sum(n + 1, f)");
__DEF(matmul, "fun (A B: int * int -> float) (p: int) -> fun (i j: int) -> reduce_sum(p, fun k -> A(i, k) *. B(k, j))");

/* Multiplies the matrix A (dim m x p) by the matrix B (dim p x n),
* and writes the result in the matrix C (dim m x n):
* C = A * B
*/
void mm(float* c, float* a, float* b, int m, int n, int p) {
__requires("A: int * int -> float, B: int * int -> float");
__reads("a ~> Matrix2(m, p, A), b ~> Matrix2(p, n, B)");
__writes("c ~> Matrix2(m, n, matmul(A, B, p))");

for (int i = 0; i < m; i++) {
__xwrites("for j in 0..n -> &c[MINDEX2(m, n, i, j)] ~~> matmul(A, B, p)(i, j)");

for (int j = 0; j < n; j++) {
__xwrites("&c[MINDEX2(m, n, i, j)] ~~> matmul(A, B, p)(i, j)");

float sum = 0.f;
__ghost(rewrite_float_linear, "inside := fun v -> &sum ~~> v, by := reduce_sum_empty(fun k -> A(i, k) *. B(k, j))");
for (int k = 0; k < p; k++) {
__spreserves("&sum ~~> reduce_sum(k, fun k0 -> A(i, k0) *. B(k0, j))");

__GHOST_BEGIN(focusA, ro_matrix2_focus, "a, i, k");
__GHOST_BEGIN(focusB, ro_matrix2_focus, "b, k, j");
sum += a[MINDEX2(m, p, i, k)] * b[MINDEX2(p, n, k, j)];
__GHOST_END(focusA);
__GHOST_END(focusB);

__ghost(in_range_bounds, "k", "k_gt_0 <- lower_bound"); // TODO: proper name k_ge_0
__ghost(rewrite_float_linear, "inside := fun v -> &sum ~~> v, by := reduce_sum_add_right(k, fun k -> A(i, k) *. B(k, j), k_gt_0)");
}

c[MINDEX2(m, n, i, j)] = sum;
}
}
}
67 changes: 67 additions & 0 deletions case_studies/gpu/matmul/matmul.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
open Optitrust
open Prelude

let _ = Flags.check_validity := true
(* let _ = Flags.pretty_matrix_notation := true *)
let _ = Flags.recompute_resources_between_steps := true
let _ = Flags.disable_stringreprs := true
let _ = Flags.save_ast_for_steps := Some Flags.Steps_script

(* let _ = Flags.report_exectime := true *)

(* Reproducing a TVM schedule for matrix multiplication:
1. improve data locality by blocking the computation of C and preloading B with a packed memory layout
2. unroll loops and introduce parallelism with vectorization and multi-threading

c.f. README.md
*)

let int = trm_int

let bm = 32

let stage_ok = fun i -> i >= 3

let _ = Run.script_cpp_stage (stage_ok) (fun () ->
(* !! Matrix.local_name ~type_and_dims:(~var:"c" ~local_var:"c_gmem" [cFor "i"]; *)
!! Matrix.local_name_tile ~uninit_pre:true ~var:"c" ~local_var:"c_gmem" [cFor "i"];
!! Matrix.local_name_tile ~uninit_post:true ~var:"a" ~local_var:"a_gmem" [cFor "i"];
!! Matrix.local_name_tile ~uninit_post:true ~var:"b" ~local_var:"b_gmem" [cFor "i"];
)

let _ = Run.script_cpp_stage (stage_ok) (fun () ->
let rec tiles (loop_id, tile_name_sizes) =
match tile_name_sizes with
| (tile_name, tile_size) :: rest ->
Loop.tile (int tile_size) ~index:tile_name ~bound:TileDivides [cFor loop_id];
tiles (loop_id, rest)
| [] -> ()
in
!! List.iter tiles [
"i", ["bi", bm; "ti", 8];
"j", ["bj", 32; "tj", 4];
"k", ["bkIdx", 4]
];
!! Loop.reorder_at ~order:["bi"; "bj"; "bkIdx"; "ti"; "tj"; "k"; "i"; "j"] [cPlusEq ~lhs:[cVar "sum"] ()];
)

let _ = Run.script_cpp_stage (stage_ok) (fun () ->
(* NOTE: this takes a full tile of A/B,
we need to carve out a subtile
!! Matrix.local_name_tile ~uninit_post:true ~var:"a_gmem" ~local_var:"a_smem" [cFor "bkIdx"; cFor "ti"];
!! Matrix.local_name_tile ~uninit_post:true ~var:"b_gmem" ~local_var:"b_smem" [cFor "bkIdx"; cFor "ti"]; *)
!! Loop.hoist_expr ~dest:[tBefore; cFor "bkIdx"; cFor ~body:[cPlusEq ()] "ti"] "a_smem" ~indep:["j"; "tj"] [cArrayRead "a_gmem"];
!! Loop.hoist_expr ~dest:[tBefore; cFor "bkIdx"; cFor ~body:[cPlusEq ()] "ti"] "b_smem" ~indep:["i"; "ti"] [cArrayRead "b_gmem"];
)

let _ = Run.script_cpp_stage (stage_ok) (fun () ->
!! Loop.hoist_expr ~dest:[tBefore; cFor "bkIdx"; cFor "i" ~body:[cPlusEq ~lhs:[cVar "sum"] ()]]
"a_regs" ~indep:["j"] [cArrayRead "a_smem"];
!! Loop.hoist_expr ~dest:[tBefore; cFor "bkIdx"; cFor "i" ~body:[cPlusEq ~lhs:[cVar "sum"] ()]]
"b_regs" ~indep:["i"] [cArrayRead "b_smem"];
)

let _ = Run.script_cpp_stage (stage_ok) (fun () ->
(* move _smem/_regs higher ? !! Loop.hoist_alloc *)
!! Cleanup.std ();
)
2 changes: 1 addition & 1 deletion lib/ast/mark.ml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ let reuse_or_next (next : unit -> mark) (m : mark) : mark =
if m = "" then next () else m

let span_marks (m: mark) : mark * mark =
(m ^ "__begin", m ^ "__end")
if m = "" then ("", "") else (m ^ "__begin", m ^ "__end")

(**** Marks ****)

Expand Down
14 changes: 9 additions & 5 deletions lib/transfo/instr.ml
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,9 @@ let%transfo accumulate_targets (tg : target) : unit =

(** [move_in_seq ~dest tg] perform the same actions as {!Instr_basic.move},
but move the instructions with the ghost pairs they need around them. *)
let%transfo move_in_seq ~(dest: target) (tg: target) : unit =
let%transfo move_in_seq
?(mark_moved : mark = no_mark)
~(dest: target) (tg: target) : unit =
if !Flags.resource_typing_enabled then
Target.iter (fun p ->
let seq_path, span = Path.extract_last_dir_span p in
Expand All @@ -104,16 +106,18 @@ let%transfo move_in_seq ~(dest: target) (tg: target) : unit =
let dest_mark = Mark.next () in
Marks.add dest_mark (target_of_path (seq_path @ [Dir_before dest_index]));

Ghost_pair.fission ~mark_between:mark_end (target_of_path (seq_path @ [Dir_before span.stop]));
Ghost_pair.fission ~mark_between:mark_begin (target_of_path (seq_path @ [Dir_before span.start]));

(* TODO: Make a better API for this kind of mark in sequence *)
let seq_dest_mark = Mark.next () in
Marks.add_fake_instr seq_dest_mark [cMark dest_mark];

Ghost_pair.fission ~mark_between:mark_end (target_of_path (seq_path @ [Dir_before (span.stop + 1)]));
Ghost_pair.fission ~mark_between:mark_begin (target_of_path (seq_path @ [Dir_before (span.start + 1)]));
(* + 1 for fake instr *)

Ghost_pure.move_all_upwards (target_of_path seq_path);
Ghost_pair.minimize_all_in_seq (target_of_path seq_path);

Instr_basic.move ~dest:[cMark seq_dest_mark; tBefore] [cMarkSpan mark];
Instr_basic.move ~mark_moved ~dest:[cMark seq_dest_mark; tBefore] [cMarkSpan mark];

Marks.remove_fake_instr [cMark seq_dest_mark];
Marks.remove_st (fun m -> m = mark_begin || m = mark_end || m = dest_mark) (target_of_path seq_path);
Expand Down
22 changes: 18 additions & 4 deletions lib/transfo/instr_basic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ let%transfo copy ?(mark_copy : mark = no_mark) ?(rev : bool = false) ?(dest:targ

This is sufficient but not necessary, a manual commutation proof can be used
as well. *)
let%transfo move ?(rev : bool = false) ~(dest : target) (tg : target) : unit =
let%transfo move ?(mark_moved : mark = no_mark)
?(rev : bool = false) ~(dest : target) (tg : target) : unit =
Resources.required_for_check ();
Target.iter ~rev (fun p ->
let p_seq, span = Path.extract_last_dir_span p in
Expand All @@ -61,7 +62,7 @@ let%transfo move ?(rev : bool = false) ~(dest : target) (tg : target) : unit =
let seq, swapped_after = Mlist.split mid_index seq in
let untouched_before, swapped_before = Mlist.split ~left_bias:true before_index seq in

if !Flags.check_validity then begin
if !Flags.check_validity && not !Flags.use_resources_with_models then begin
let usage_before = Resources.compute_usage_of_instrs swapped_before in
let usage_after = Resources.compute_usage_of_instrs swapped_after in
let ctx = [
Expand All @@ -72,8 +73,21 @@ let%transfo move ?(rev : bool = false) ~(dest : target) (tg : target) : unit =
Trace.justif "resources commute"
end;

let seq = Mlist.merge_list [untouched_before; swapped_after; swapped_before; untouched_after] in
trm_alter ~desc:(Trm_seq (seq, result)) t_seq
let (moved_beg, moved_end) = span_marks mark_moved in
trm_seq_helper ~annot:t_seq.annot ?result (
[TrmMlist untouched_before] @
(if span.start < dest_index then
[TrmMlist swapped_after;
Mark moved_beg;
TrmMlist swapped_before;
Mark moved_end]
else
[Mark moved_beg;
TrmMlist swapped_after;
Mark moved_end;
TrmMlist swapped_before])
@ [TrmMlist untouched_after]
)
) p_seq
) tg;
Scope.infer_var_ids ()
Expand Down
34 changes: 20 additions & 14 deletions lib/transfo/loop.ml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,25 @@ let%transfo fission ?(nest_of : int = 1) (tg : target) : unit =
fission_rec next_mark nest_of m_interstice
)) tg

(* TODO: factorize with non-bis
this moves the instructions with the ghost pairs they need around them. *)
let%transfo move_out_bis
?(empty_range: empty_range_mode = Produced_resources_uninit_after)
(tg : target) : unit =
Marks.with_marks (fun next_mark -> Target.iter (fun p ->
let seq_path, span = Path.extract_last_dir_span p in

let mark_moved = next_mark () in
Instr.move_in_seq ~mark_moved ~dest:[tFirst] (target_of_path p);

Ghost_pure.minimize_all_in_seq (target_of_path seq_path);
(* TODO: this is required if other transformations like Variable_basic.inline don't eagerly do it. *)
Resources.make_strict_loop_contracts [];
let loop_mark = next_mark () in
Loop_basic.move_out ~loop_mark [cPath seq_path; Constr_depth (DepthAt 0); tSpan [tFirst] [cMarkSpanStop mark_moved]];
if !Flags.check_validity then Resources.loop_minimize [cMark loop_mark];
) tg)

(* TODO: redundant with 'hoist' *)
(** [hoist_alloc_loop_list]: this transformation is similar to [Loop_basic.hoist], but also supports undetached
variable declarations, hoisting through multiple loops, and inlining array indexing code.
Expand Down Expand Up @@ -262,23 +281,10 @@ let%transfo hoist_instr_loop_list (loops : int list) (tg : target) : unit =
| [] -> ()
| 0 :: rl ->
(* do not create dimension. *)
let loop_mark = next_m () in
let instr_mark = next_m () in
Trace.step ~kind:Step_group ~name:(sprintf "%d. move out" i) (fun () ->
Marks.add instr_mark (target_of_path p);
(* TODO: factorize this in a Loop.move_out combi ? *)
Instr.move_in_seq ~dest:[tFirst] (target_of_path p);
let seq_p, _ = Path.extract_last_dir p in
Ghost_pure.minimize_all_in_seq (target_of_path seq_p);
(* TODO: this is required if other transformations like Variable_basic.inline don't eagerly do it. *)
Resources.make_strict_loop_contracts [];
let path = Target.resolve_target_exactly_one [cMark instr_mark; tBefore] in
let p_seq, instr_index = Path.extract_last_dir_before path in
for _ = 0 to instr_index - 1 do
Loop_basic.move_out [cPath p_seq; dSeqNth 0];
done;
Loop_basic.move_out ~loop_mark [cMark instr_mark];
if !Flags.check_validity then Resources.loop_minimize [cMark loop_mark];
move_out_bis (target_of_path p);
);
Target.iter (fun p -> aux (i + 1) rl p) [cMark instr_mark];
| 1 :: rl ->
Expand Down
36 changes: 20 additions & 16 deletions lib/transfo/loop_basic.ml
Original file line number Diff line number Diff line change
Expand Up @@ -654,28 +654,30 @@ type empty_range_mode =
[trm_index] - index of that instruction on its surrouding sequence (just checks that it is 0),
[t] - ast of the for loop.
*)
let move_out_on (instr_mark : mark) (loop_mark : mark) (empty_range: empty_range_mode) (trm_index : int) (t : trm) : trm =
if (trm_index <> 0) then failwith "Loop_basic.move_out: not targeting the first instruction in a loop (got %d instead)" trm_index;
let move_out_on (instr_mark : mark) (loop_mark : mark) (empty_range: empty_range_mode) (span : Dir.span) (t : trm) : trm =
if (span.start <> 0) then failwith "Loop_basic.move_out: not targeting the first instructions in a loop (got %d instead)" span.start;
let error = "Loop_basic.move_out: expected for loop" in
let (range, mode, body, contract) = trm_inv ~error trm_for_inv t in
let instrs, _ = trm_inv ~error trm_seq_inv body in
let instr = Mlist.nth instrs 0 in
let rest = Mlist.pop_front instrs in
let (rest, moved_instrs) = Mlist.extract span.start span.stop instrs in

if !Flags.check_validity then begin
if is_free_var_in_trm range.index instr then
(* NOTE: would be checked by var ids anyway *)
trm_fail instr "Loop_basic.move_out: instruction uses loop index";
Resources.assert_dup_instr_redundant 0 (Mlist.length instrs - 1) body;
if !Flags.check_validity && not !Flags.use_resources_with_models then begin
Mlist.iteri (fun i instr ->
if is_free_var_in_trm range.index instr then
(* NOTE: would be checked by var ids anyway *)
trm_fail instr "Loop_basic.move_out: instruction uses loop index";
(* TODO: assert_dup_instr_redundant on group of instrs at once *)
Resources.assert_dup_instr_redundant i (Mlist.length instrs - 1) body;
) moved_instrs;

begin match empty_range with
| Generate_if -> ()
| Arithmetically_impossible -> failwith "Arithmetically_impossible is not implemented yet"
| Produced_resources_uninit_after ->
if not contract.strict then failwith "Need the for loop contract to be strict";
let instr_usage = Resources.usage_of_trm instr in
let instr_usage = Resources.compute_usage_of_instrs moved_instrs in
let invariant_written_by_instr = List.filter (Resource_set.(linear_usage_filter instr_usage keep_written)) contract.invariant.linear in
List.iter (fun (_, f) -> if not (Resource_formula.is_formula_uninit f) then trm_fail instr "The instruction cannot be moved out because it consumes resources that are not uninitialized after the loop (and the loop range could be empty)"
List.iter (fun (_, f) -> if not (Resource_formula.is_formula_uninit f) then trm_fail body "The instruction cannot be moved out because it consumes resources that are not uninitialized after the loop (and the loop range could be empty)"
) invariant_written_by_instr
end;

Expand All @@ -688,16 +690,17 @@ let move_out_on (instr_mark : mark) (loop_mark : mark) (empty_range: empty_range
contract
else
(* FIXME: this still requires resources to update contract even when not checking validity! *)
let resources_after = Option.unsome ~error:"Loop_basic.move_out: requires computed resources" instr.ctx.ctx_resources_after in
let resources_after = Option.unsome ~error:"Loop_basic.move_out: requires computed resources" (Option.unsome (Mlist.last moved_instrs)).ctx.ctx_resources_after in
let _, new_invariant, _ = Resource_computation.subtract_linear_resource_set resources_after.linear (Resource_contract.parallel_reads_inside_loop range contract.parallel_reads @ contract.iter_contract.pre.linear) in
{ contract with invariant = { contract.invariant with linear = new_invariant } }
in

let loop = trm_for ~mode ~contract range (trm_seq rest) in
let non_empty_cond = trm_ineq range.direction range.start range.stop in
let instr_outside = if generate_if then trm_if non_empty_cond instr (trm_unit ()) else instr in
let moved_instrs = Mlist.map (trm_add_mark instr_mark) moved_instrs in
let instr_outside = if generate_if then trm_if non_empty_cond (trm_seq moved_instrs) (trm_unit ()) else (trm_seq_nobrace moved_instrs) in
trm_seq_nobrace_nomarks [
trm_add_mark instr_mark instr_outside;
(* trm_add_mark instr_mark *) instr_outside;
trm_add_mark loop_mark loop]

(** [move_out tg]: expects the target [tg] to point at the first instruction inside the loop
Expand Down Expand Up @@ -733,8 +736,9 @@ let%transfo move_out ?(instr_mark : mark = no_mark) ?(loop_mark : mark = no_mark
Nobrace_transfo.remove_after (fun _ ->
Target.iter (fun p ->
Resources.required_for_check ();
let i, p = Path.index_in_surrounding_loop p in
apply_at_path (move_out_on instr_mark loop_mark empty_range i) p
let seq_path, span = Path.extract_last_dir_span p in
let loop_path = Path.parent seq_path in
apply_at_path (move_out_on instr_mark loop_mark empty_range span) loop_path
) tg)

let move_out_alloc_on (trm_index : int) (t : trm) : trm =
Expand Down
59 changes: 59 additions & 0 deletions tests/loop/hoist_expr/loop_hoist_expr_models.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#include <optitrust_models.h>

void f(int* t, int* u) {
__requires("T: int -> int");
__reads("t ~> Matrix1(10, T)");
__writes("u ~> Matrix1(10, T)");

for (int i = 0; i < 10; i++) {
__strict();
__xreads("&t[MINDEX1(10, i)] ~~> T(i)");
__xwrites("&u[MINDEX1(10, i)] ~~> T(i)");

int x = t[MINDEX1(10, i)];
u[MINDEX1(10, i)] = x;
int z = x;
}

__ghost(ro_matrix1_focus, "t, 0");
for (int l = 0; l < 5; l++) {
__strict();
__sreads("&t[MINDEX1(10, 0)] ~~> T(0)");

for (int m = 0; m < 2; m++) {
__strict();
__sreads("&t[MINDEX1(10, 0)] ~~> T(0)");

int x = l + m + t[MINDEX1(10, 0)];
}
}
__ghost(ro_matrix1_unfocus, "t");

for (int a = 0; a < 8; a++) {
__strict();
__sreads("u ~> Matrix1(10, T)");

int y1 = 0;
int y2 = 1;
for (int b = 0; b < 5; b++) {
__strict();
__sreads("u ~> Matrix1(10, T)");
int p1 = 0;
int p2 = 1;

for (int c = 0; c < 2; c++) {
__strict();
__sreads("u ~> Matrix1(10, T)");
int q1 = 0;
int q2 = 1;

// TODO: ideally, figure out some ghost to put in the rewrite path here
__GHOST_BEGIN(focus, ro_matrix1_focus, "u, 0");
int x = a + b + c + u[MINDEX1(10, 0)];
__GHOST_END(focus);
}
int z1 = 0;
int z2 = 1;
}
}
}
13 changes: 13 additions & 0 deletions tests/loop/hoist_expr/loop_hoist_expr_models.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
open Optitrust
open Prelude

let _ = Flags.check_validity := true
let _ = Flags.recompute_resources_between_steps := true

let _ = Run.script_cpp (fun () ->
!! Loop.hoist_expr ~dest:[tBefore; cFor "i"] "t2" [cFor "i"; cArrayRead "t"];
!! Loop.hoist_expr ~dest:[tBefore; cFor "l"] "t02" ~indep:["l"; "m"] [cFor "l"; cArrayRead "t"];
!! Loop.hoist_expr ~dest:[tBefore; cFor "a"] "a2" ~indep:["b"; "c"] [cVarDef "x"; cVar "a"];
!! Loop.hoist_expr ~dest:[tBefore; cFor ~body:[cVarDef "x"] "a"] "u2" ~indep:["b"; "c"] [cVarDef "x"; cArrayRead "u"];
!!! ();
)
Loading
Loading