From 57ee160be959a3e59bf7a6f2846ef50d17124213 Mon Sep 17 00:00:00 2001 From: Alban Gruin <alban.gruin@irit.fr> Date: Wed, 5 Oct 2022 13:35:14 +0200 Subject: [PATCH] modular_store_buffer: support latencies in all stages This adds support for latencies in stages other than Lu in the store buffer case. Signed-off-by: Alban Gruin <alban.gruin@irit.fr> --- src/modular_store_buffer.v | 509 ++++++++++++++++++++++++++----------- 1 file changed, 363 insertions(+), 146 deletions(-) diff --git a/src/modular_store_buffer.v b/src/modular_store_buffer.v index 6fe24ef..483cddb 100644 --- a/src/modular_store_buffer.v +++ b/src/modular_store_buffer.v @@ -3,10 +3,6 @@ From StoreBuffer Require Import modular_definitions utils. Import ListNotations. Module StoreBufferSig <: Sig. - Variant opcode_sb := - | Load (dlat : nat) (id : nat) - | Store (id : nat). - Variant stage_sb := | Sp | Lsu @@ -16,11 +12,6 @@ Module StoreBufferSig <: Sig. Definition first_stage := Sp. - Scheme Equality for opcode_sb. - Definition opcode := opcode_sb. - Definition opcode_beq := opcode_sb_beq. - Definition opcode_eq_dec := opcode_sb_eq_dec. - Scheme Equality for stage_sb. Definition stage := stage_sb. Definition stage_beq := stage_sb_beq. @@ -31,6 +22,19 @@ Module StoreBufferSig <: Sig. now destruct s. Qed. + Lemma stage_beq_iff_eq : forall s s', stage_beq s s' = true <-> s = s'. + Proof. + intros s s'. + now destruct s, s'. + Qed. + + Lemma stage_nbeq_iff_neq : forall s s', stage_beq s s' = false <-> s <> s'. + Proof. + intros s s'. + rewrite <- not_true_iff_false. + apply not_iff_compat, stage_beq_iff_eq. + Qed. + (* Boolean stage comparison. Sp < Lsu < (Lu, Su) < Sn. *) Definition leb x y := match x, y with @@ -45,9 +49,51 @@ Module StoreBufferSig <: Sig. | Su, _ => false end. + Definition can_have_lat st := + match st with + | Sp | Su | Sn => false + | _ => true + end. + + Lemma stage_beq_implies_leb : forall st st', stage_beq st st' = true -> leb st st' = true. + Proof. + intros st st'. + now destruct st, st'. + Qed. + + Definition lats := (stage_sb * nat)%type. + + Definition lats_beq l1 l2 := + match l1, l2 with + | (s1, n1), (s2, n2) => stage_beq s1 s2 && (n1 =? n2) + end. + + Variant opcode_sb := + | Load (lat : list lats) (id : nat) + | Store (lat : list lats) (id : nat). + + Definition opcode := opcode_sb. + + Definition opcode_beq o1 o2 := + match o1, o2 with + | Load l1 id1, Load l2 id2 => list_beq lats lats_beq l1 l2 && (id1 =? id2) + | Store l1 id1, Store l2 id2 => list_beq lats lats_beq l1 l2 && (id1 =? id2) + | _, _ => false + end. + + Lemma opcode_eq_dec : forall (o1 o2 : opcode_sb), {o1 = o2} + {o1 <> o2}. + Proof. + repeat decide equality. + Qed. + + Definition lat opc := + match opc with + | Load lat _ | Store lat _ => lat + end. + Definition idx opc := match opc with - | Load _ id | Store id => id + | Load _ id | Store _ id => id end. End StoreBufferSig. @@ -58,7 +104,7 @@ Definition nstg s opc := match s, opc with | Sp, _ => Lsu | Lsu, Load _ _ => Lu - | Lsu, Store _ => Su + | Lsu, Store _ _ => Su | _, _ => Sn end. @@ -110,15 +156,22 @@ Definition free next_id sbState trace (elt : instr_kind) := end. Definition get_latency opc st := - match opc, st with - | Load n _, Lu => n - | _, _ => 0 - end. + if can_have_lat st then + let lat := List.find (fun stl => + match stl with + | (stl, _) => stage_beq st stl + end) (lat opc) in + match lat with + | None => 0 + | Some (_, n) => n + end + else + 0. Definition cycle_elt next_id (busTaken : bool) sbState trace elt := match elt with | (opc, (st, S n)) => - if busTaken then + if (negb (stage_beq Lu st)) || busTaken then (opc, (st, n)) else elt @@ -140,18 +193,6 @@ Definition legal_sb_transitions st sbState sbState' := | _, _ => sbState' = Full \/ sbState' = NotEmpty \/ sbState' = Empty end. -Lemma next_stage_is_higher : - forall opc st st', st' = nstg st opc -> - compare_two (Some (opc, (st, 0))) - (Some (opc, (st', get_latency opc st'))) = true. -Proof. - intros opc st st' Hst'. - rewrite Hst'. - destruct st; - try reflexivity; - now destruct opc. -Qed. - Lemma opc_match_case_identical : forall (A : Type) (a : A) opc, match opc with | Load _ _ | _ => a end = a. Proof. @@ -159,6 +200,107 @@ Proof. now destruct opc. Qed. +Section instr_progress_generalization. + Variable opc : opcode. + Variable st st' : stage. + Variable n : nat. + + Lemma next_stage_is_higher : + st' = nstg st opc -> + compare_two (Some (opc, (st, 0))) + (Some (opc, (st', get_latency opc st'))) = true. + Proof. + intro Hst'. + rewrite Hst'. + destruct st; + try reflexivity; + now destruct opc. + Qed. + + Lemma state_leb_n0 : state_leb (st, 0) (st', S n) = true -> + stage_beq st st' = false. + Proof. + simpl. + now destruct stage_beq. + Qed. + + Lemma state_leb_diff : (opc, (st, 0)) <> (opc, (st', 0)) -> + stage_beq st st' = false. + Proof. + intro Hed. + rewrite stage_nbeq_iff_neq. + intro Hst. + destruct Hed. + now rewrite Hst. + Qed. + + Lemma leb_remains : (st' = Lu -> exists lat id, opc = Load lat id) -> + (st' = Su -> exists lat id, opc = Store lat id) -> + stage_beq st st' = false -> leb st st' = true -> + leb (nstg st opc) st' = true. + Proof. + intros Hlu Hsu Hst Hleb. + + destruct st; + [| destruct opc |..]; + destruct st'; + auto. + - now destruct Hsu as [lat [id' Hopc]]. + - now destruct Hlu as [lat [id' Hopc]]. + Qed. + + Lemma leb_far_remains : stage_beq st st' = false -> leb st st' = true -> + leb (nstg st opc) (nstg st' opc) = true. + Proof. + intros Hst Hleb. + destruct st, st'; + auto; + now destruct opc. + Qed. + + Lemma stage_beq_nstg_nstg_eq : stage_beq st st' = false -> leb st st' = true -> + stage_beq (nstg st opc) (nstg st' opc) = true -> + st' = Sn. + Proof. + destruct st, st'; + try discriminate; + now destruct opc. + Qed. + + Lemma stage_beq_nstg_sn : leb st st' = true -> + stage_beq st (nstg st' opc) = true -> st' = Sn. + Proof. + destruct st, st'; + try discriminate; + now destruct opc. + Qed. + + Lemma leb_nstg_right : leb st st' = true -> leb st (nstg st' opc) = true. + Proof. + destruct st, st'; + try discriminate; + now destruct opc. + Qed. + + Lemma right_progresses_state_leb : + (if stage_beq st st' then true else leb st st') = true -> + (if stage_beq st (nstg st' opc) then + get_latency opc (nstg st' opc) <=? n + else + leb st (nstg st' opc)) = true. + Proof. + intro Hconstr. + + destruct (stage_beq _ st') eqn:Hsb; + [apply stage_beq_implies_leb in Hsb |]; + destruct (stage_beq _ (nstg st' _)) eqn:Hsb'. + - now rewrite stage_beq_nstg_sn. + - now apply leb_nstg_right. + - now rewrite stage_beq_nstg_sn. + - now apply leb_nstg_right. + Qed. +End instr_progress_generalization. + Section monotonicity. Variable opc : opcode. Variable d : instr_kind. @@ -185,8 +327,8 @@ Section monotonicity. | (opc, (st, _)) => idx opc >= next_id t <-> st = Sp end. - Hypothesis Hlu : forall st opc n, st = Lu -> exists id, opc = Load n id. - Hypothesis Hsu : forall st opc, st = Su -> exists id, opc = Store id. + Hypothesis Hlu : forall st opc, st = Lu -> exists lat id, opc = Load lat id. + Hypothesis Hsu : forall st opc, st = Su -> exists lat id, opc = Store lat id. (* Long hypotheses related to stage occupation. *) Hypothesis HfreeLsuPersists : @@ -202,12 +344,10 @@ Section monotonicity. Hypothesis HsbRemainsEmpty : forall st, st = Lsu -> sbEmpty sbState = true -> sbEmpty sbState' = true. - Hypothesis HvalidLat : forall (e : instr_kind) (busTaken : bool), - match e with - | (_, (st, 0)) => True - | (Load dlat _, (st, n)) => st = Lu /\ n <= dlat /\ (busTaken = false -> n = dlat) - | (_, (st, S _)) => False - end. + Hypothesis HvalidLat : forall (e : instr_kind), + match e with + | (opc, (st, n)) => n <= get_latency opc st + end. Section simple_monotonicity. Variable e e' : instr_kind. @@ -241,10 +381,22 @@ Section monotonicity. end. Ltac stage_opc Hyp := - destruct (Hyp eq_refl) as [sopc_internal_id sopc_internal_Hopc]; - (discriminate sopc_internal_Hopc || - rewrite sopc_internal_Hopc; - auto). + destruct Hyp as [lat [id Hopc]]; [ + reflexivity | + (discriminate Hopc || + rewrite Hopc; + auto) + ]. + + Ltac busTaken_delve := + destruct (negb _); + simpl (if (_ || _) then _ else _); + [| + destruct busTaken; [ + rewrite (HbusTakenPersists eq_refl) | + destruct busTaken' + ] + ]. Lemma unmoved_no_lat_can_advance : e = (opc, (st, 0)) -> e = e' -> forall n, @@ -275,9 +427,7 @@ Section monotonicity. * rewrite Nat.eqb_eq in Hidx. rewrite <- Hidx in HnId. destruct HnIdHigh as [_ HnIdHigh']. - pose proof (n_le_he_eq _ _ HnId (HnIdHigh' eq_refl)) as HnIdEq. - rewrite <- Nat.eqb_eq in HnIdEq. - now rewrite HnIdEq. + now rewrite <- (n_le_he_eq _ _ HnId (HnIdHigh' eq_refl)), Nat.eqb_refl. * discriminate. + (* LSU is not free in t: this is irrelevant. *) @@ -304,7 +454,7 @@ Section monotonicity. + discriminate. (* Lu: instructions without latency (this is the case here) move to Sn. *) - - stage_opc (Hlu n). + - stage_opc Hlu. (* Su: if there is space in the store buffer, move to Sn, otherwise, remain in Su. Eliminate cases where sbState/sbState' are inconsistent. *) @@ -345,124 +495,191 @@ Section monotonicity. now apply compare_two_eq_true. - (* Case where an instruction has a latency higher than 0. *) - (* cycle_elt ... e = cycle_elt ... e'. Then, compare_two ... = true. *) - destruct busTaken. - + rewrite (HbusTakenPersists eq_refl). + (* We have e = e' = (opc, (st, S n)) *) + busTaken_delve. + + (* Case where st <> Lu: both can progress. *) + (* cycle_elt ... e = cycle_elt ... e': compare_two ... = true. *) now apply compare_two_eq_true. - + destruct busTaken'. - * simpl. - rewrite stage_beq_eq_true, Nat.leb_le. - auto. - * now apply compare_two_eq_true. + (* Cases where st = Lu. *) + + (* busTaken = true -> busTaken' = true, both can progress. *) + (* cycle_elt ... e = cycle_elt ... e': compare_two ... = true. *) + now apply compare_two_eq_true. + + (* busTaken = false, busTaken' = true. *) + (* cycle_elt ... e = e, cycle_elt ... e' = (opc, (st, n)). *) + (* compare_two ... = n <=? S n. This is trivial. *) + simpl. + rewrite stage_beq_eq_true, Nat.leb_le. + auto. + + (* busTaken = false, busTaken' = false, both can't progress. *) + (* cycle_elt ... e = cycle_elt ... e': compare_two ... = true. *) + now apply compare_two_eq_true. Qed. - Lemma moved_no_lat_is_monotonic : - e = (opc, (st, 0)) -> e' = (opc, (st', 0)) -> e <> e' -> - state_leb (st, 0) (st', 0) = true -> - compare_two (List.nth_error tc i) (List.nth_error tc' i) = true. + Lemma moved_lat_left_is_monotonic : + forall n, e = (opc, (st, n)) -> e' = (opc, (st', 0)) -> e <> e' -> + state_leb (st, n) (st', 0) = true -> + compare_two (List.nth_error tc i) (List.nth_error tc' i) = true. Proof. - intros He He' Hed Hsl. - clear HfreeFromLsuPersists HfreeLsuPersists Hlsbt. - specialize (Hlu st opc 0). - pose proof (Hsu st' opc) as Hsu'. - specialize (Hsu st opc). + intros n He He' Hed Hsl. + + (* The idea of this proof is as follows: + * If we have two instructions, e and e', with e' more advanced than e, + * and e''s latency is equal to 0, e will be, in the worst case, able to + * obtain the same progress than e', but not overtake it. + *) - (* Goal transformation: show exactly how e and e' are modified by the - * cycle function. *) + (* Extract (cycle_elt ... e ). *) work_on_e Hed. - (* Case-by-case analysis *) - destruct st, st'; - try contradiction; - try discriminate. - - (* st = Sp, st' = Lsu, Lu, Su, Sn *) - - now destruct (_ && _), (_ && _), opc. - - now destruct (_ && _), opc. - - now destruct (_ && _), (_ && _). - - now destruct (_ && _), opc. - - (* st = Lsu, st' = Lu, Su, Sn *) - - now destruct (_ && _), opc. - - destruct (_ && _), (_ && _); - try reflexivity; - stage_opc Hsu'. - - now destruct (_ && _), opc. - - (* st = Lu, st' = Sn *) - - (* Trivial when opc = Load, eliminate the case where opc = Store. *) - stage_opc Hlu. - - (* st = Su, st' = Sn *) - - (* Eliminate the case where opc = Load. *) - stage_opc Hsu. - (* Either the instruction remains in Su, or it progresses in Sn. *) - (* Both cases are trivial. *) - now destruct (_ && _). + (* Case-by-case analysis on e's latency, then on the conditions that + * decide whether or not an instruction can progress. *) + destruct n; + repeat match goal with + | [ _ : _ |- context [ if ?X then _ else _ ] ] => destruct X + end; + (* In some cases, e and e' won't progress (or at least, remain in the + * same stage for e), so compare_two ... = + * (if stage_beq st st' then true else leb st st') (or some + * complexified variant on this). This is the same as Hsl, so try to + * apply it preemptively. *) + try assumption; + simpl in Hsl |- *. + + (* n = 0. *) + (* Since e <> e', st < st'. Interesting cases are where e can progress, + * and e cannot while e' can. If e and e' cannot progress, st and st' + * remain the same, and it is the same a proving Hsl. These cases were + * handled before. *) + - (* e and e' progressed. *) + rewrite (state_leb_diff opc st st' Hed) in Hsl. + apply state_leb_diff in Hed. + destruct (stage_beq (nstg _ _) _) eqn:Hsb. + + (* nstg st = nstg st'. This means that st = st' = Sn, and their + * latency is equal to 0. *) + now rewrite (stage_beq_nstg_nstg_eq opc st st'). + + (* nstg st <> nstg st' -> nstg st < nstg st'. *) + (* We have a lemma for this. *) + now apply leb_far_remains. + - (* e progressed, e' did not. *) + rewrite (state_leb_diff opc st st' Hed) in Hsl. + apply state_leb_diff in Hed. + destruct (stage_beq (nstg _ _) _). + + (* nstg st = st'. Trivial. *) + reflexivity. + + (* nstg st <> st' -> nstg st < st'. *) + (* We have a lemma for this. *) + now apply (leb_remains opc st st' (Hlu _ _) (Hsu _ _)). + - (* e cannot progress, but e' can. *) + now apply right_progresses_state_leb. + + (* n <> 0. *) + (* e will remain st, but e' may progress to the next stage. If it can + * not, this is the same as proving Hsl (in the hypothesis), and these + * cases were already handled earlier. If it can, we have to prove that + * e's state is still lower than e'. We have a lemma for this. *) + - (* Case where e did not progress (ie. st = Lu and busTaken = false). *) + (* cycle_elt ... e = (opc, (st, S n)) *) + now apply right_progresses_state_leb. + - (* Case where e did progress. cycle_elt ... e = (opc, (st, n)). *) + now apply right_progresses_state_leb. Qed. Lemma moved_lat_is_monotonic : forall n n', e = (opc, (st, n)) -> e' = (opc, (st', n')) -> e <> e' -> - state_leb (st, n) (st', n') = true -> - compare_two (List.nth_error tc i) (List.nth_error tc' i) = true. + state_leb (st, n) (st', n') = true -> + compare_two (List.nth_error tc i) (List.nth_error tc' i) = true. Proof. intros n n' He He' Hed Hsl. clear HfreeFromLsuPersists HfreeLsuPersists Hlsbt. - pose proof (HvalidLat e' busTaken') as HvalidLat'. - specialize (HvalidLat e busTaken). - - destruct n, n'; [ - (* n and n' are = 0. We have a lemma for that. *) - now apply moved_no_lat_is_monotonic | - (* Other cases: rewrite the goal to make cycle appear. *) - work_on_e Hed; - (* Since n or n' is not equal to 0, opc is a load. Remove other cases. *) - destruct opc; - [| contradiction] .. - ]. - - (* From there, opc = Load. *) - (* n = 0 and n' <> 0. *) - - (* st' = Lu. *) - destruct HvalidLat' as [Hst' [Hlat' _]]. - rewrite Hst' in Hsl, Hed |- *. - destruct st; - try discriminate. - + (* st = Sp. In this case, the next stage is either Sp or Lsu, which are - * both lower than Lu. *) - now destruct (_ && _), busTaken'. - + (* st = Lsu. In this case, the next stage is either Lsu or Lu. *) - destruct (_ && _). - * simpl. - destruct busTaken'; - rewrite Nat.leb_le; - lia. - * now destruct busTaken'. - - (* n <> 0 and n' = 0. *) - - (* st = Lu. *) - destruct HvalidLat as [Hst Hlat]. - rewrite Hst in Hsl, Hed |- *. - destruct st'; - (* Sp, Lsu: impossible since e' would be lower than e. *) - (* Su: impossible since loads cannot be in Su. *) - discriminate - (* Lu: since n' = 0, e' will be in Sn in the next cycle, e will still be *) - (* in Lu. *) - (* Sn: e' will remain in Sn, e will still be in Lu. *) - || now destruct busTaken. - - (* n and n' <> 0. *) - - destruct busTaken. - + now rewrite HbusTakenPersists. - + destruct HvalidLat as [Hst [_ Hdlat]], HvalidLat' as [Hst' [Hlat' Hdlat']]. - rewrite (Hdlat eq_refl), Hst, Hst' in Hed |- *. - destruct busTaken'. - * simpl. - rewrite Nat.leb_le. + specialize (HvalidLat e'). + simpl in Hsl. + + (* Case by case analysis on e''s latency. *) + destruct n'; + (* n' = 0, we have a lemma for this. *) + [now apply (moved_lat_left_is_monotonic n) |]; + work_on_e Hed; + (* Case by case analysis on e's latency. *) + destruct n. + + - (* n = 0, n' <> 0. *) + (* So, e is in another stage than e'. + * In the worst case, e will be in the same stage as e' in the next + * cycle, and e' will remain in the same stage. This will give: + * compare_two (opc, (st', ?N)) (opc, (st', n' - 1)) = true. + * As ?N = get_latency opc st, and n' <= get_latency opc st, + * n' - 1 < ?N, hence the proposition above is true. + *) + pose proof (state_leb_n0 _ _ _ Hsl) as Hst. + specialize (Hlu st' opc). + specialize (Hsu st' opc). + + rewrite Hst in Hsl. + unfold compare_two, state_leb. + + (* e may go to the next stage, e''s latency may be reduced. Check all + * these situations. *) + destruct (_ && _), (_ || _); + (* e go to the next stage: a finer analysis is required. *) + (destruct (stage_beq (nstg _ _) _) eqn:Hsb + (* e does not go to the next stage: st = st' and st < st' + * still hold. *) + (* compare_two ... = if st =? st' then ... else st < st'. *) + || now rewrite Hst); + (* e went to the next stage. *) + match goal with + | [ _ : _ |- leb _ _ = true ] => + (* Cases where nstg st <> st'. *) + (* We must prove that nstg st < st'. We have a lemma for this. *) + now apply leb_remains + | _ => + rewrite stage_beq_iff_eq in Hsb; + rewrite Hsb, Nat.leb_le + end. + + (* Cases where nstg st = st'. *) + + (* Case where e''s latency decreased. *) + (* We have to prove that n' < get_latency opc st', + * and we know that S n' < get_latency opc st. *) + lia. + + (* Case where e''s latency did not decrease (ie. st' = Lu). *) + (* We have to prove that S n' < get_latency opc st, + * while it is already in the hypothesis. *) + exact HvalidLat. + + - (* n and n' <> 0. *) + destruct (stage_eq_dec st st') as [Heq | Hdiff]. + + (* st = st'. *) + (* In the worst case, we have: + * compare_two (opc, (st, n - 1)) (opc, (st, n')) = true + * We know that n > n' (cf. assumptions), so n - 1 >= n'. + * The proposition is true. + * Other cases are: + * compare_two (opc, (st, n)) (opc, (st, n')) = true + * compare_two (opc, (st, n - 1)) (opc, (st, n' - 1)) = true + * Both propositions are true. *) + rewrite Heq in Hsl |- *. + busTaken_delve; + (* Take care of cases where both instructions can or cannot + * progress automatically. *) + auto. + * (* st = Lu. busTaken = false, busTaken' = true. *) + (* e = (opc, (st', S n)), e' = (opc, (st', S n')). *) + (* S n' <=? S n. We must prove that n' <=? S n. *) + simpl. + rewrite stage_beq_eq_true, Nat.leb_le in Hsl |- *. lia. - * now rewrite Hdlat' in Hed |- *. + + + (* st <> st'. *) + (* Both instructions will remain in their respective stages. + * (Their latency may or may not decrease, but that's not + * important.) *) + unfold compare_two, state_leb. + rewrite <- stage_nbeq_iff_neq in Hdiff. + rewrite Hdiff in Hsl. + destruct (_ || _), (_ || _); + now rewrite Hdiff. Qed. Theorem is_monotonic : -- GitLab