diff --git a/src/modular_store_buffer.v b/src/modular_store_buffer.v
index 6fe24efa33dd5b90b5bd28bab0af2108266f9b6f..483cddbeed842bfd186b13f833c7c8fef87fe9ba 100644
--- a/src/modular_store_buffer.v
+++ b/src/modular_store_buffer.v
@@ -3,10 +3,6 @@ From StoreBuffer Require Import modular_definitions utils.
 Import ListNotations.
 
 Module StoreBufferSig <: Sig.
-  Variant opcode_sb :=
-    | Load (dlat : nat) (id : nat)
-    | Store (id : nat).
-
   Variant stage_sb :=
     | Sp
     | Lsu
@@ -16,11 +12,6 @@ Module StoreBufferSig <: Sig.
 
   Definition first_stage := Sp.
 
-  Scheme Equality for opcode_sb.
-  Definition opcode := opcode_sb.
-  Definition opcode_beq := opcode_sb_beq.
-  Definition opcode_eq_dec := opcode_sb_eq_dec.
-
   Scheme Equality for stage_sb.
   Definition stage := stage_sb.
   Definition stage_beq := stage_sb_beq.
@@ -31,6 +22,19 @@ Module StoreBufferSig <: Sig.
     now destruct s.
   Qed.
 
+  Lemma stage_beq_iff_eq : forall s s', stage_beq s s' = true <-> s = s'.
+  Proof.
+    intros s s'.
+    now destruct s, s'.
+  Qed.
+
+  Lemma stage_nbeq_iff_neq : forall s s', stage_beq s s' = false <-> s <> s'.
+  Proof.
+    intros s s'.
+    rewrite <- not_true_iff_false.
+    apply not_iff_compat, stage_beq_iff_eq.
+  Qed.
+
   (* Boolean stage comparison.  Sp < Lsu < (Lu, Su) < Sn. *)
   Definition leb x y :=
     match x, y with
@@ -45,9 +49,51 @@ Module StoreBufferSig <: Sig.
     | Su, _ => false
     end.
 
+  Definition can_have_lat st :=
+    match st with
+    | Sp | Su | Sn => false
+    | _ => true
+    end.
+
+  Lemma stage_beq_implies_leb : forall st st', stage_beq st st' = true -> leb st st' = true.
+  Proof.
+    intros st st'.
+    now destruct st, st'.
+  Qed.
+
+  Definition lats := (stage_sb * nat)%type.
+
+  Definition lats_beq l1 l2 :=
+    match l1, l2 with
+    | (s1, n1), (s2, n2) => stage_beq s1 s2 && (n1 =? n2)
+    end.
+
+  Variant opcode_sb :=
+    | Load (lat : list lats) (id : nat)
+    | Store (lat : list lats) (id : nat).
+
+  Definition opcode := opcode_sb.
+
+  Definition opcode_beq o1 o2 :=
+    match o1, o2 with
+    | Load l1 id1, Load l2 id2 => list_beq lats lats_beq l1 l2 && (id1 =? id2)
+    | Store l1 id1, Store l2 id2 => list_beq lats lats_beq l1 l2 && (id1 =? id2)
+    | _, _ => false
+    end.
+
+  Lemma opcode_eq_dec : forall (o1 o2 : opcode_sb), {o1 = o2} + {o1 <> o2}.
+  Proof.
+    repeat decide equality.
+  Qed.
+
+  Definition lat opc :=
+    match opc with
+    | Load lat _ | Store lat _ => lat
+    end.
+
   Definition idx opc :=
     match opc with
-    | Load _ id | Store id => id
+    | Load _ id | Store _ id => id
     end.
 End StoreBufferSig.
 
@@ -58,7 +104,7 @@ Definition nstg s opc :=
   match s, opc with
   | Sp, _ => Lsu
   | Lsu, Load _ _ => Lu
-  | Lsu, Store _ => Su
+  | Lsu, Store _ _ => Su
   | _, _ => Sn
   end.
 
@@ -110,15 +156,22 @@ Definition free next_id sbState trace (elt : instr_kind) :=
   end.
 
 Definition get_latency opc st :=
-  match opc, st with
-  | Load n _, Lu => n
-  | _, _ => 0
-  end.
+  if can_have_lat st then
+    let lat := List.find (fun stl =>
+                            match stl with
+                            | (stl, _) => stage_beq st stl
+                            end) (lat opc) in
+    match lat with
+    | None => 0
+    | Some (_, n) => n
+    end
+  else
+    0.
 
 Definition cycle_elt next_id (busTaken : bool) sbState trace elt :=
   match elt with
   | (opc, (st, S n)) =>
-      if busTaken then
+      if (negb (stage_beq Lu st)) || busTaken then
         (opc, (st, n))
       else
         elt
@@ -140,18 +193,6 @@ Definition legal_sb_transitions st sbState sbState' :=
   | _, _ => sbState' = Full \/ sbState' = NotEmpty \/ sbState' = Empty
   end.
 
-Lemma next_stage_is_higher :
-  forall opc st st', st' = nstg st opc ->
-                compare_two (Some (opc, (st, 0)))
-                  (Some (opc, (st', get_latency opc st'))) = true.
-Proof.
-  intros opc st st' Hst'.
-  rewrite Hst'.
-  destruct st;
-    try reflexivity;
-    now destruct opc.
-Qed.
-
 Lemma opc_match_case_identical : forall (A : Type) (a : A) opc,
     match opc with | Load _ _ | _ => a end = a.
 Proof.
@@ -159,6 +200,107 @@ Proof.
   now destruct opc.
 Qed.
 
+Section instr_progress_generalization.
+  Variable opc : opcode.
+  Variable st st' : stage.
+  Variable n : nat.
+
+  Lemma next_stage_is_higher :
+    st' = nstg st opc ->
+    compare_two (Some (opc, (st, 0)))
+      (Some (opc, (st', get_latency opc st'))) = true.
+  Proof.
+    intro Hst'.
+    rewrite Hst'.
+    destruct st;
+      try reflexivity;
+      now destruct opc.
+  Qed.
+
+  Lemma state_leb_n0 : state_leb (st, 0) (st', S n) = true ->
+                       stage_beq st st' = false.
+  Proof.
+    simpl.
+    now destruct stage_beq.
+  Qed.
+
+  Lemma state_leb_diff : (opc, (st, 0)) <> (opc, (st', 0)) ->
+                         stage_beq st st' = false.
+  Proof.
+    intro Hed.
+    rewrite stage_nbeq_iff_neq.
+    intro Hst.
+    destruct Hed.
+    now rewrite Hst.
+  Qed.
+
+  Lemma leb_remains : (st' = Lu -> exists lat id, opc = Load lat id) ->
+                      (st' = Su -> exists lat id, opc = Store lat id) ->
+                      stage_beq st st' = false -> leb st st' = true ->
+                      leb (nstg st opc) st' = true.
+  Proof.
+    intros Hlu Hsu Hst Hleb.
+
+    destruct st;
+      [| destruct opc |..];
+      destruct st';
+      auto.
+    - now destruct Hsu as [lat [id' Hopc]].
+    - now destruct Hlu as [lat [id' Hopc]].
+  Qed.
+
+  Lemma leb_far_remains : stage_beq st st' = false -> leb st st' = true ->
+                          leb (nstg st opc) (nstg st' opc) = true.
+  Proof.
+    intros Hst Hleb.
+    destruct st, st';
+      auto;
+      now destruct opc.
+  Qed.
+
+  Lemma stage_beq_nstg_nstg_eq : stage_beq st st' = false -> leb st st' = true ->
+                                 stage_beq (nstg st opc) (nstg st' opc) = true ->
+                                 st' = Sn.
+  Proof.
+    destruct st, st';
+      try discriminate;
+      now destruct opc.
+  Qed.
+
+  Lemma stage_beq_nstg_sn : leb st st' = true ->
+                            stage_beq st (nstg st' opc) = true -> st' = Sn.
+  Proof.
+    destruct st, st';
+      try discriminate;
+      now destruct opc.
+  Qed.
+
+  Lemma leb_nstg_right : leb st st' = true -> leb st (nstg st' opc) = true.
+  Proof.
+    destruct st, st';
+      try discriminate;
+      now destruct opc.
+  Qed.
+
+  Lemma right_progresses_state_leb :
+    (if stage_beq st st' then true else leb st st') = true ->
+    (if stage_beq st (nstg st' opc) then
+       get_latency opc (nstg st' opc) <=? n
+     else
+       leb st (nstg st' opc)) = true.
+  Proof.
+    intro Hconstr.
+
+    destruct (stage_beq _ st') eqn:Hsb;
+      [apply stage_beq_implies_leb in Hsb |];
+      destruct (stage_beq _ (nstg st' _)) eqn:Hsb'.
+    - now rewrite stage_beq_nstg_sn.
+    - now apply leb_nstg_right.
+    - now rewrite stage_beq_nstg_sn.
+    - now apply leb_nstg_right.
+  Qed.
+End instr_progress_generalization.
+
 Section monotonicity.
   Variable opc : opcode.
   Variable d : instr_kind.
@@ -185,8 +327,8 @@ Section monotonicity.
   | (opc, (st, _)) => idx opc >= next_id t <-> st = Sp
   end.
 
-  Hypothesis Hlu : forall st opc n, st = Lu -> exists id, opc = Load n id.
-  Hypothesis Hsu : forall st opc, st = Su -> exists id, opc = Store id.
+  Hypothesis Hlu : forall st opc, st = Lu -> exists lat id, opc = Load lat id.
+  Hypothesis Hsu : forall st opc, st = Su -> exists lat id, opc = Store lat id.
 
   (* Long hypotheses related to stage occupation. *)
   Hypothesis HfreeLsuPersists :
@@ -202,12 +344,10 @@ Section monotonicity.
   Hypothesis HsbRemainsEmpty :
     forall st, st = Lsu -> sbEmpty sbState = true -> sbEmpty sbState' = true.
 
-  Hypothesis HvalidLat : forall (e : instr_kind) (busTaken : bool),
-      match e with
-      | (_, (st, 0)) => True
-      | (Load dlat _, (st, n)) => st = Lu /\ n <= dlat /\ (busTaken = false -> n = dlat)
-      | (_, (st, S _)) => False
-      end.
+  Hypothesis HvalidLat : forall (e : instr_kind),
+  match e with
+  | (opc, (st, n)) => n <= get_latency opc st
+  end.
 
   Section simple_monotonicity.
     Variable e e' : instr_kind.
@@ -241,10 +381,22 @@ Section monotonicity.
         end.
 
     Ltac stage_opc Hyp :=
-      destruct (Hyp eq_refl) as [sopc_internal_id sopc_internal_Hopc];
-        (discriminate sopc_internal_Hopc ||
-           rewrite sopc_internal_Hopc;
-           auto).
+      destruct Hyp as [lat [id Hopc]]; [
+          reflexivity |
+          (discriminate Hopc ||
+             rewrite Hopc;
+             auto)
+        ].
+
+    Ltac busTaken_delve :=
+      destruct (negb _);
+        simpl (if (_ || _) then _ else _);
+        [|
+          destruct busTaken; [
+            rewrite (HbusTakenPersists eq_refl) |
+            destruct busTaken'
+          ]
+        ].
 
     Lemma unmoved_no_lat_can_advance :
       e = (opc, (st, 0)) -> e = e' -> forall n,
@@ -275,9 +427,7 @@ Section monotonicity.
           * rewrite Nat.eqb_eq in Hidx.
             rewrite <- Hidx in HnId.
             destruct HnIdHigh as [_ HnIdHigh'].
-            pose proof (n_le_he_eq _ _ HnId (HnIdHigh' eq_refl)) as HnIdEq.
-            rewrite <- Nat.eqb_eq in HnIdEq.
-            now rewrite HnIdEq.
+            now rewrite <- (n_le_he_eq _ _ HnId (HnIdHigh' eq_refl)), Nat.eqb_refl.
           * discriminate.
 
         + (* LSU is not free in t: this is irrelevant. *)
@@ -304,7 +454,7 @@ Section monotonicity.
         + discriminate.
 
       (* Lu: instructions without latency (this is the case here) move to Sn. *)
-      - stage_opc (Hlu n).
+      - stage_opc Hlu.
 
       (* Su: if there is space in the store buffer, move to Sn, otherwise, remain
        in Su.  Eliminate cases where sbState/sbState' are inconsistent. *)
@@ -345,124 +495,191 @@ Section monotonicity.
             now apply compare_two_eq_true.
 
       - (* Case where an instruction has a latency higher than 0. *)
-        (* cycle_elt ... e = cycle_elt ... e'.  Then, compare_two ... = true. *)
-        destruct busTaken.
-        + rewrite (HbusTakenPersists eq_refl).
+        (* We have e = e' = (opc, (st, S n)) *)
+        busTaken_delve.
+        + (* Case where st <> Lu: both can progress. *)
+          (* cycle_elt ... e = cycle_elt ... e': compare_two ... = true. *)
           now apply compare_two_eq_true.
-        + destruct busTaken'.
-          * simpl.
 
-            rewrite stage_beq_eq_true, Nat.leb_le.
-            auto.
-          * now apply compare_two_eq_true.
+        (* Cases where st = Lu. *)
+        + (* busTaken = true -> busTaken' = true, both can progress. *)
+          (* cycle_elt ... e = cycle_elt ... e': compare_two ... = true. *)
+          now apply compare_two_eq_true.
+        + (* busTaken = false, busTaken' = true. *)
+          (* cycle_elt ... e = e, cycle_elt ... e' = (opc, (st, n)). *)
+          (* compare_two ... = n <=? S n.  This is trivial. *)
+          simpl.
+          rewrite stage_beq_eq_true, Nat.leb_le.
+          auto.
+        + (* busTaken = false, busTaken' = false, both can't progress. *)
+          (* cycle_elt ... e = cycle_elt ... e': compare_two ... = true. *)
+          now apply compare_two_eq_true.
     Qed.
 
-    Lemma moved_no_lat_is_monotonic :
-      e = (opc, (st, 0)) -> e' = (opc, (st', 0)) -> e <> e' ->
-      state_leb (st, 0) (st', 0) = true ->
-      compare_two (List.nth_error tc i) (List.nth_error tc' i) = true.
+    Lemma moved_lat_left_is_monotonic :
+      forall n, e = (opc, (st, n)) -> e' = (opc, (st', 0)) -> e <> e' ->
+           state_leb (st, n) (st', 0) = true ->
+           compare_two (List.nth_error tc i) (List.nth_error tc' i) = true.
     Proof.
-      intros He He' Hed Hsl.
-      clear HfreeFromLsuPersists HfreeLsuPersists Hlsbt.
-      specialize (Hlu st opc 0).
-      pose proof (Hsu st' opc) as Hsu'.
-      specialize (Hsu st opc).
+      intros n He He' Hed Hsl.
+
+      (* The idea of this proof is as follows:
+       * If we have two instructions, e and e', with e' more advanced than e,
+       * and e''s latency is equal to 0, e will be, in the worst case, able to
+       * obtain the same progress than e', but not overtake it.
+       *)
 
-      (* Goal transformation: show exactly how e and e' are modified by the
-       * cycle function. *)
+      (* Extract (cycle_elt ... e ). *)
       work_on_e Hed.
 
-      (* Case-by-case analysis *)
-      destruct st, st';
-        try contradiction;
-        try discriminate.
-
-      (* st = Sp, st' = Lsu, Lu, Su, Sn *)
-      - now destruct (_ && _), (_ && _), opc.
-      - now destruct (_ && _), opc.
-      - now destruct (_ && _), (_ && _).
-      - now destruct (_ && _), opc.
-
-      (* st = Lsu, st' = Lu, Su, Sn *)
-      - now destruct (_ && _), opc.
-      - destruct (_ && _), (_ && _);
-          try reflexivity;
-          stage_opc Hsu'.
-      - now destruct (_ && _), opc.
-
-      (* st = Lu, st' = Sn *)
-      - (* Trivial when opc = Load, eliminate the case where opc = Store. *)
-        stage_opc Hlu.
-
-      (* st = Su, st' = Sn *)
-      - (* Eliminate the case where opc = Load. *)
-        stage_opc Hsu.
-        (* Either the instruction remains in Su, or it progresses in Sn. *)
-        (* Both cases are trivial. *)
-        now destruct (_ && _).
+      (* Case-by-case analysis on e's latency, then on the conditions that
+       * decide whether or not an instruction can progress. *)
+      destruct n;
+        repeat match goal with
+          | [ _ : _ |- context [ if ?X then _ else _ ] ] => destruct X
+          end;
+        (* In some cases, e and e' won't progress (or at least, remain in the
+         * same stage for e), so compare_two ... =
+         * (if stage_beq st st' then true else leb st st') (or some
+         * complexified variant on this).  This is the same as Hsl, so try to
+         * apply it preemptively. *)
+        try assumption;
+        simpl in Hsl |- *.
+
+      (* n = 0. *)
+      (* Since e <> e', st < st'.  Interesting cases are where e can progress,
+       * and e cannot while e' can.  If e and e' cannot progress, st and st'
+       * remain the same, and it is the same a proving Hsl.  These cases were
+       * handled before. *)
+      - (* e and e' progressed. *)
+        rewrite (state_leb_diff opc st st' Hed) in Hsl.
+        apply state_leb_diff in Hed.
+        destruct (stage_beq (nstg _ _) _) eqn:Hsb.
+        + (* nstg st = nstg st'.  This means that st = st' = Sn, and their
+           * latency is equal to 0. *)
+          now rewrite (stage_beq_nstg_nstg_eq opc st st').
+        + (* nstg st <> nstg st' -> nstg st < nstg st'. *)
+          (* We have a lemma for this. *)
+          now apply leb_far_remains.
+      - (* e progressed, e' did not. *)
+        rewrite (state_leb_diff opc st st' Hed) in Hsl.
+        apply state_leb_diff in Hed.
+        destruct (stage_beq (nstg _ _) _).
+        + (* nstg st = st'.  Trivial. *)
+          reflexivity.
+        + (* nstg st <> st' -> nstg st < st'. *)
+          (* We have a lemma for this. *)
+          now apply (leb_remains opc st st' (Hlu _ _) (Hsu _ _)).
+      - (* e cannot progress, but e' can. *)
+        now apply right_progresses_state_leb.
+
+      (* n <> 0. *)
+      (* e will remain st, but e' may progress to the next stage.  If it can
+       * not, this is the same as proving Hsl (in the hypothesis), and these
+       * cases were already handled earlier.  If it can, we have to prove that
+       * e's state is still lower than e'.  We have a lemma for this. *)
+      - (* Case where e did not progress (ie. st = Lu and busTaken = false). *)
+        (* cycle_elt ... e = (opc, (st, S n)) *)
+        now apply right_progresses_state_leb.
+      - (* Case where e did progress.  cycle_elt ... e = (opc, (st, n)). *)
+        now apply right_progresses_state_leb.
     Qed.
 
     Lemma moved_lat_is_monotonic :
       forall n n', e = (opc, (st, n)) -> e' = (opc, (st', n')) -> e <> e' ->
-                   state_leb (st, n) (st', n') = true ->
-                   compare_two (List.nth_error tc i) (List.nth_error tc' i) = true.
+              state_leb (st, n) (st', n') = true ->
+              compare_two (List.nth_error tc i) (List.nth_error tc' i) = true.
     Proof.
       intros n n' He He' Hed Hsl.
       clear HfreeFromLsuPersists HfreeLsuPersists Hlsbt.
-      pose proof (HvalidLat e' busTaken') as HvalidLat'.
-      specialize (HvalidLat e busTaken).
-
-      destruct n, n'; [
-          (* n and n' are = 0.  We have a lemma for that. *)
-          now apply moved_no_lat_is_monotonic |
-            (* Other cases: rewrite the goal to make cycle appear. *)
-          work_on_e Hed;
-            (* Since n or n' is not equal to 0, opc is a load.  Remove other cases. *)
-            destruct opc;
-            [| contradiction] ..
-        ].
-
-      (* From there, opc = Load. *)
-      (* n = 0 and n' <> 0. *)
-      - (* st' = Lu. *)
-        destruct HvalidLat' as [Hst' [Hlat' _]].
-        rewrite Hst' in Hsl, Hed |- *.
-        destruct st;
-          try discriminate.
-        + (* st = Sp.  In this case, the next stage is either Sp or Lsu, which are
-           * both lower than Lu. *)
-          now destruct (_ && _), busTaken'.
-        + (* st = Lsu.  In this case, the next stage is either Lsu or Lu. *)
-          destruct (_ && _).
-          * simpl.
-            destruct busTaken';
-              rewrite Nat.leb_le;
-              lia.
-          * now destruct busTaken'.
-
-      (* n <> 0 and n' = 0. *)
-      - (* st = Lu. *)
-        destruct HvalidLat as [Hst Hlat].
-        rewrite Hst in Hsl, Hed |- *.
-        destruct st';
-          (* Sp, Lsu: impossible since e' would be lower than e. *)
-          (* Su: impossible since loads cannot be in Su. *)
-          discriminate
-          (* Lu: since n' = 0, e' will be in Sn in the next cycle, e will still be *)
-          (* in Lu. *)
-          (* Sn: e' will remain in Sn, e will still be in Lu. *)
-          || now destruct busTaken.
-
-      (* n and n' <> 0. *)
-      - destruct busTaken.
-        + now rewrite HbusTakenPersists.
-        + destruct HvalidLat as [Hst [_ Hdlat]], HvalidLat' as [Hst' [Hlat' Hdlat']].
-          rewrite (Hdlat eq_refl), Hst, Hst' in Hed |- *.
-          destruct busTaken'.
-          * simpl.
-            rewrite Nat.leb_le.
+      specialize (HvalidLat e').
+      simpl in Hsl.
+
+      (* Case by case analysis on e''s latency. *)
+      destruct n';
+        (* n' = 0, we have a lemma for this. *)
+        [now apply (moved_lat_left_is_monotonic n) |];
+        work_on_e Hed;
+        (* Case by case analysis on e's latency. *)
+        destruct n.
+
+      - (* n = 0, n' <> 0. *)
+        (* So, e is in another stage than e'.
+         * In the worst case, e will be in the same stage as e' in the next
+         * cycle, and e' will remain in the same stage.  This will give:
+         * compare_two (opc, (st', ?N)) (opc, (st', n' - 1)) = true.
+         * As ?N = get_latency opc st, and n' <= get_latency opc st,
+         * n' - 1 < ?N, hence the proposition above is true.
+         *)
+        pose proof (state_leb_n0 _ _ _ Hsl) as Hst.
+        specialize (Hlu st' opc).
+        specialize (Hsu st' opc).
+
+        rewrite Hst in Hsl.
+        unfold compare_two, state_leb.
+
+        (* e may go to the next stage, e''s latency may be reduced.  Check all
+         * these situations. *)
+        destruct (_ && _), (_ || _);
+          (* e go to the next stage: a finer analysis is required. *)
+          (destruct (stage_beq (nstg _ _) _) eqn:Hsb
+          (* e does not go to the next stage: st = st' and st < st'
+           * still hold. *)
+          (* compare_two ... = if st =? st' then ... else st < st'. *)
+            || now rewrite Hst);
+          (* e went to the next stage. *)
+          match goal with
+          | [ _ : _ |- leb _ _ = true ] =>
+              (* Cases where nstg st <> st'. *)
+              (* We must prove that nstg st < st'.  We have a lemma for this. *)
+              now apply leb_remains
+          | _ =>
+              rewrite stage_beq_iff_eq in Hsb;
+                rewrite Hsb, Nat.leb_le
+          end.
+
+        (* Cases where nstg st = st'. *)
+        + (* Case where e''s latency decreased. *)
+          (* We have to prove that n' < get_latency opc st',
+           * and we know that S n' < get_latency opc st. *)
+          lia.
+        + (* Case where e''s latency did not decrease (ie. st' = Lu). *)
+          (* We have to prove that S n' < get_latency opc st,
+           * while it is already in the hypothesis. *)
+          exact HvalidLat.
+
+      - (* n and n' <> 0. *)
+        destruct (stage_eq_dec st st') as [Heq | Hdiff].
+        + (* st = st'. *)
+          (* In the worst case, we have:
+           * compare_two (opc, (st, n - 1)) (opc, (st, n')) = true
+           * We know that n > n' (cf. assumptions), so n - 1 >= n'.
+           * The proposition is true.
+           * Other cases are:
+           * compare_two (opc, (st, n)) (opc, (st, n')) = true
+           * compare_two (opc, (st, n - 1)) (opc, (st, n' - 1)) = true
+           * Both propositions are true. *)
+          rewrite Heq in Hsl |- *.
+          busTaken_delve;
+            (* Take care of cases where both instructions can or cannot
+             * progress automatically. *)
+            auto.
+          * (* st = Lu.  busTaken = false, busTaken' = true. *)
+            (* e = (opc, (st', S n)), e' = (opc, (st', S n')). *)
+            (* S n' <=? S n.  We must prove that n' <=? S n. *)
+            simpl.
+            rewrite stage_beq_eq_true, Nat.leb_le in Hsl |- *.
             lia.
-          * now rewrite Hdlat' in Hed |- *.
+
+        + (* st <> st'. *)
+          (* Both instructions will remain in their respective stages.
+           * (Their latency may or may not decrease, but that's not
+           * important.) *)
+          unfold compare_two, state_leb.
+          rewrite <- stage_nbeq_iff_neq in Hdiff.
+          rewrite Hdiff in Hsl.
+          destruct (_ || _), (_ || _);
+            now rewrite Hdiff.
     Qed.
 
     Theorem is_monotonic :