test/TensorFlow/sese_loop_canonicalization.sil

// RUN: %target-sil-opt -tf-xla-cfg-canonicalize -tf-ensure-single-loop-exit -assume-parsing-unqualified-ownership-sil %s -o /dev/null | %FileCheck %s

import Builtin
import Swift
import TensorFlow

/*
public func loopTest(breakCount:Int32) -> Int32 {
  var count:Int32 = 0
  var sum:Int32 = 0
  while (count < 100) {
    sum += count
    count += 1
    if (count == breakCount) {
      break;
    }
  }
  return sum
}
*/
// CHECK-LABEL:--- XLA CFG Canonicalize: $loopWithBreak
// CHECK: [sequence
// CHECK:   <while Preheader: [[PHDR:bb[0-9]+]], Header: [[HDR:bb[0-9]+]], exit: [[EXIT:bb[0-9]+]]
// CHECK:     [sequence
// CHECK:       {condition Header: [[ORIG_HDR:bb[0-9]+]]
// CHECK:         {condition Header: [[BRK_COND:bb[0-9]+]]
// CHECK:           block [[BRK_TRUE:bb[0-9]+]]
// CHECK:           block [[BRK_FALSE:bb[0-9]+]]}
// CHECK:         block [[ORIG_EXIT:bb[0-9]+]]}
// CHECK:       block [[LATCH:bb[0-9]+]]
// CHECK:   block [[EXIT]]]
// CHECK: --- XLA CFG Canonicalize end

//-- Preheader sets up the undefs, exit index, and stayInLoop flag.
// CHECK: [[PHDR]](%0 : $Builtin.Int32):
// CHECK: [[CONST_ONE:%.*]] =  integer_literal $Builtin.Int32, 1
// CHECK: [[PHDR_EXIT:%.*]] = graph_op "Const"() {dtype: $Builtin.Int32, value$tensor: i32 0, __device: "ALL_DEVICES"}
// CHECK: [[PHDR_FLAG:%.*]] = graph_op "Const"() {dtype: $Builtin.Int1, value$tensor: i1 -1, __device: "ALL_DEVICES"}
// CHECK: br [[HDR]]([[A:%.*]] : $Builtin.Int32, [[A]] : $Builtin.Int32, [[A]] : $Builtin.Int32, [[PHDR_EXIT]] : $TensorHandle<Builtin.Int32>, [[PHDR_FLAG]] : $TensorHandle<Builtin.Int1>)

// Original header
// CHECK: [[ORIG_HDR]]:
// CHECK:  [[A:%.*]] = builtin "cmp_slt_Int32"
// CHECK:  cond_br [[A]], [[BRK_COND]], [[ORIG_EXIT]]

//- Sets up index to 1 and flag to false on the exit branch of the original header to latch.
// CHECK: [[ORIG_EXIT]]:
// CHECK:   [[LOCAL_EXIT_INDEX:%.*]] = graph_op "Const"() {dtype: $Builtin.Int32, value$tensor: i32 1, __device: "ALL_DEVICES"} : $TensorHandle<Builtin.Int32>
// CHECK:   [[LOCAL_STAY_FLAG:%.*]] = graph_op "Const"() {dtype: $Builtin.Int1, value$tensor: i1 0, __device: "ALL_DEVICES"} : $TensorHandle<Builtin.Int1>
// CHECK:  br [[LATCH]]([[SUM_AT_HDR:%.*]] : $Builtin.Int32, [[COUNT_AT_HDR:%.*]] : $Builtin.Int32, [[SUM_AT_HDR]] : $Builtin.Int32, [[LOCAL_EXIT_INDEX]] : $TensorHandle<Builtin.Int32>, [[LOCAL_STAY_FLAG]] : $TensorHandle<Builtin.Int1>)

// CHECK: [[BRK_COND]]:
// CHECK:  [[SUM_ESCAPING:%.*]] = builtin "sadd_with_overflow_Int32"([[SUM_AT_HDR]] : $Builtin.Int32, [[COUNT_AT_HDR]] : $Builtin.Int32) : $Builtin.Int32
// CHECK:  [[COUNT_ESCAPING:%.*]] = builtin "sadd_with_overflow_Int32"([[COUNT_AT_HDR]] : $Builtin.Int32, [[CONST_ONE]] : $Builtin.Int32) : $Builtin.Int32
// CHECK: cond_br {{.*}}, [[BRK_TRUE]], [[BRK_FALSE]]

//- Sets up index to 0 and flag to true on the false branch of the if with break to latch.
// CHECK: [[BRK_FALSE]]:
// CHECK: [[LOCAL_EXIT_INDEX:%.*]] = graph_op "Const"() {dtype: $Builtin.Int32, value$tensor: i32 0, __device: "ALL_DEVICES"} : $TensorHandle<Builtin.Int32>
// CHECK: [[LOCAL_STAY_FLAG:%.*]] = graph_op "Const"() {dtype: $Builtin.Int1, value$tensor: i1 -1, __device: "ALL_DEVICES"} : $TensorHandle<Builtin.Int1>
// CHECK:  br [[LATCH]]([[SUM_ESCAPING]] : $Builtin.Int32, [[COUNT_ESCAPING]] : $Builtin.Int32, [[SUM_ESCAPING_AT_HDR:%.*]] : $Builtin.Int32, [[LOCAL_EXIT_INDEX]] : $TensorHandle<Builtin.Int32>, [[LOCAL_STAY_FLAG]] : $TensorHandle<Builtin.Int1>)

//- Sets up index to 1 and flag to false on the true branch of the if with break to latch.
// CHECK: [[BRK_TRUE]]:
// CHECK:  [[LOCAL_EXIT_INDEX:%.*]] = graph_op "Const"() {dtype: $Builtin.Int32, value$tensor: i32 1, __device: "ALL_DEVICES"} : $TensorHandle<Builtin.Int32>
// CHECK:  [[LOCAL_STAY_FLAG:%.*]] = graph_op "Const"() {dtype: $Builtin.Int1, value$tensor: i1 0, __device: "ALL_DEVICES"} : $TensorHandle<Builtin.Int1>
// CHECK: br [[LATCH]]([[SUM_AT_HDR]] : $Builtin.Int32, [[COUNT_AT_HDR]] : $Builtin.Int32, [[SUM_ESCAPING]] : $Builtin.Int32, [[LOCAL_EXIT_INDEX]] : $TensorHandle<Builtin.Int32>, [[LOCAL_STAY_FLAG]] : $TensorHandle<Builtin.Int1>)

//-- New Header simply checks flag
// CHECK: [[HDR]]([[SUM_AT_HDR]] : $Builtin.Int32, [[COUNT_AT_HDR]] : $Builtin.Int32, [[SUM_ESCAPING_AT_HDR]] : $Builtin.Int32, [[HDR_EXIT_ARG:%.*]] : $TensorHandle<Builtin.Int32>, [[HDR_FLAG_ARG:%.*]] : $TensorHandle<Builtin.Int1>):
// CHECK:  [[B:%.*]] = graph_op "tf_tensor_to_i1"([[HDR_FLAG_ARG]] : $TensorHandle<Builtin.Int1>) {{.*}} : $Builtin.Int1
// CHECK:  cond_br [[B]], [[ORIG_HDR]], [[EXIT]]

sil @$loopWithBreak : $@convention(thin) (Builtin.Int32) -> Builtin.Int32 {
// %0                                             // user: %12
bb0(%0 : $Builtin.Int32):
  %1 = integer_literal $Builtin.Int32, 0          // user: %4
  %2 = integer_literal $Builtin.Int32, 1          // users: %11, %4
  %3 = integer_literal $Builtin.Int32, 100        // user: %7
  br bb1(%1 : $Builtin.Int32, %1 : $Builtin.Int32) // id: %4

// %5                                             // users: %10, %9
// %6                                             // users: %11, %10, %7
bb1(%5 : $Builtin.Int32, %6 : $Builtin.Int32):    // Preds: bb4 bb0
  %7 = builtin "cmp_slt_Int32"(%6 : $Builtin.Int32, %3 : $Builtin.Int32) : $Builtin.Int1 // user: %8
  cond_br %7, bb3, bb2                            // id: %8

bb2:                                              // Preds: bb1
  br bb6(%5 : $Builtin.Int32)                     // id: %9

bb3:                                              // Preds: bb1
  %10 = builtin "sadd_with_overflow_Int32"(%5 : $Builtin.Int32, %6 : $Builtin.Int32) : $Builtin.Int32 // users: %15, %14
  %11 = builtin "sadd_with_overflow_Int32"(%6 : $Builtin.Int32, %2 : $Builtin.Int32) : $Builtin.Int32 // users: %14, %12
  %12 = builtin "cmp_eq_Int32"(%11 : $Builtin.Int32, %0 : $Builtin.Int32) : $Builtin.Int1 // user: %13
  cond_br %12, bb5, bb4                           // id: %13

bb4:                                              // Preds: bb3
  br bb1(%10 : $Builtin.Int32, %11 : $Builtin.Int32) // id: %14

bb5:                                              // Preds: bb3
  br bb6(%10 : $Builtin.Int32)                    // id: %15

// %16                                            // user: %17
bb6(%16 : $Builtin.Int32):                        // Preds: bb5 bb2
  return %16 : $Builtin.Int32                     // id: %17
}


/*
public func nestedLoopWithBreak(breakCount:Int32) -> Int32 {
  var sum:Int32 = 0
  var j:Int32 = 0
  var count:Int32 = 0
  while j < 100 {
    while (count < 100) {
      sum += count
      count += 1
      if (count == breakCount) {
        break;
      }
    }
    j += 1
    count -= breakCount
    if (sum > breakCount) {
      break;
    }
  }
  return sum
}
*/
// CHECK-LABEL: --- XLA CFG Canonicalize: $nestedLoopWithBreak
// CHECK:[sequence
// CHECK:  <while Preheader: [[OPHDR:bb[0-9]+]], Header: [[OHDR:bb[0-9]+]], exit: [[OEXIT:bb[0-9]+]]
// CHECK:    [sequence
// CHECK:      {condition Header: [[OCOND:bb[0-9]+]]
// CHECK:        [sequence
// CHECK:          <while Preheader: [[IPHDR:bb[0-9]+]], Header: [[IHDR:bb[0-9]+]], exit: [[IEXIT:bb[0-9]+]]
// CHECK:            [sequence
// CHECK:              {condition Header: {{bb[0-9]+}}
// CHECK:                {condition Header: {{bb[0-9]+}}
// CHECK:                  block {{bb[0-9]+}}
// CHECK:                  block {{bb[0-9]+}}
// CHECK:                block {{bb[0-9]+}}
// CHECK:              block {{bb[0-9]+}}]>
// CHECK:          {condition Header: {{bb[0-9]+}}
// CHECK:            block {{bb[0-9]+}}
// CHECK:            block {{bb[0-9]+}}}]
// CHECK:        block {{bb[0-9]+}}}
// CHECK:      block {{bb[0-9]+}}]>
// CHECK:  block [[OEXIT]]]
//-- Loop preheaders have appropriate number of additional arguments
//-- Outer loop
// CHECK: [[OPHDR]](%0 : $Builtin.Int32):
// CHECK: br [[OHDR]]([[A:%.*]] : $Builtin.Int32, [[A]] : $Builtin.Int32, [[A]] : $Builtin.Int32, [[A]] : $Builtin.Int32, {{.*}} : $TensorHandle<Builtin.Int32>, {{.*}} : $TensorHandle<Builtin.Int1>)
//-- Inner loop
// CHECK: [[IPHDR]]:                                              // Preds: [[OCOND]]
// CHECK:  br [[IHDR]]({{.*}} : $Builtin.Int32, {{.*}} : $Builtin.Int32, [[B:%[0-9]+]] : $Builtin.Int32, [[B]] : $Builtin.Int32, {{.*}} : $TensorHandle<Builtin.Int32>, {{.*}} : $TensorHandle<Builtin.Int1>)

sil @$nestedLoopWithBreak : $@convention(thin) (Builtin.Int32) -> Builtin.Int32 {
// %0                                             // users: %27, %26, %19
bb0(%0 : $Builtin.Int32):
  %1 = integer_literal $Builtin.Int32, 0          // users: %4, %4, %4
  %2 = integer_literal $Builtin.Int32, 100        // users: %14, %8
  %3 = integer_literal $Builtin.Int32, 1          // users: %25, %18
  br bb1(%1 : $Builtin.Int32, %1 : $Builtin.Int32, %1 : $Builtin.Int32) // id: %4

// %5                                             // users: %11, %10
// %6                                             // users: %25, %8
// %7                                             // user: %11
bb1(%5 : $Builtin.Int32, %6 : $Builtin.Int32, %7 : $Builtin.Int32): // Preds: bb11 bb0
  %8 = builtin "cmp_slt_Int32"(%6 : $Builtin.Int32, %2 : $Builtin.Int32) : $Builtin.Int1 // user: %9
  cond_br %8, bb3, bb2                            // id: %9

bb2:                                              // Preds: bb1
  br bb12(%5 : $Builtin.Int32)                    // id: %10

bb3:                                              // Preds: bb1
  br bb4(%5 : $Builtin.Int32, %7 : $Builtin.Int32) // id: %11

// %12                                            // users: %17, %16
// %13                                            // users: %18, %17, %16, %14
bb4(%12 : $Builtin.Int32, %13 : $Builtin.Int32):  // Preds: bb7 bb3
  %14 = builtin "cmp_slt_Int32"(%13 : $Builtin.Int32, %2 : $Builtin.Int32) : $Builtin.Int1 // user: %15
  cond_br %14, bb6, bb5                           // id: %15

bb5:                                              // Preds: bb4
  br bb9(%12 : $Builtin.Int32, %13 : $Builtin.Int32) // id: %16

bb6:                                              // Preds: bb4
  %17 = builtin "sadd_with_overflow_Int32"(%12 : $Builtin.Int32, %13 : $Builtin.Int32) : $Builtin.Int32 // users: %22, %21
  %18 = builtin "sadd_with_overflow_Int32"(%13 : $Builtin.Int32, %3 : $Builtin.Int32) : $Builtin.Int32 // users: %22, %21, %19
  %19 = builtin "cmp_eq_Int32"(%18 : $Builtin.Int32, %0 : $Builtin.Int32) : $Builtin.Int1 // user: %20
  cond_br %19, bb8, bb7                           // id: %20

bb7:                                              // Preds: bb6
  br bb4(%17 : $Builtin.Int32, %18 : $Builtin.Int32) // id: %21

bb8:                                              // Preds: bb6
  br bb9(%17 : $Builtin.Int32, %18 : $Builtin.Int32) // id: %22

// %23                                            // users: %30, %29, %27
// %24                                            // user: %26
bb9(%23 : $Builtin.Int32, %24 : $Builtin.Int32):  // Preds: bb8 bb5
  %25 = builtin "sadd_with_overflow_Int32"(%6 : $Builtin.Int32, %3 : $Builtin.Int32) : $Builtin.Int32 // user: %30
  %26 = builtin "ssub_with_overflow_Int32"(%24 : $Builtin.Int32, %0 : $Builtin.Int32) : $Builtin.Int32 // user: %30
  %27 = builtin "cmp_slt_Int32"(%0 : $Builtin.Int32, %23 : $Builtin.Int32) : $Builtin.Int1 // user: %28
  cond_br %27, bb10, bb11                         // id: %28

bb10:                                             // Preds: bb9
  br bb12(%23 : $Builtin.Int32)                   // id: %29

bb11:                                             // Preds: bb9
  br bb1(%23 : $Builtin.Int32, %25 : $Builtin.Int32, %26 : $Builtin.Int32) // id: %30

// %31                                            // user: %32
bb12(%31 : $Builtin.Int32):                       // Preds: bb10 bb2
  return %31 : $Builtin.Int32                     // id: %32
} // end sil function '$S20canonicalize_cfg_xla10nestedLoop10breakCounts5Int32VAE_tF'

/* do while loop of the following form
public func nestedLoopWithBreak(breakCount:Int32) -> Int32 {
  var j:Int32 = 0
	do {
	  j = j + 1
	  if j == breakCount { break; }
		j = j + 1
	} while (j < 100)
  return j
}
*/

// CHECK-LABEL: --- XLA CFG Canonicalize: $doWhileLoop
// CHECK: [sequence
// CHECK:   <while Preheader: [[PHDR:bb[0-9]+]], Header: [[HDR:bb[0-9]+]], exit: [[EXIT:bb[0-9]+]]
// CHECK:     [sequence
// CHECK:       {condition Header: {{bb[0-9]+}}
// CHECK:         {condition Header: {{bb[0-9]+}}
// CHECK:           block {{bb[0-9]+}}
// CHECK:           block {{bb[0-9]+}}}
// CHECK:         block {{bb[0-9]+}}}
// CHECK:       block {{bb[0-9]+}}]>
// CHECK:   block {{bb[0-9]+}}]

// Make sure undef is still left in this case for now.
// CHECK: sil @$doWhileLoop : {{.*}} (Builtin.Int32) -> Builtin.Int32 {
// CHECK: [[PHDR]]({{.*}} : $Builtin.Int32):
// CHECK: br [[HDR]]({{.*}} : $Builtin.Int32, undef : $Builtin.Int32, {{.*}} : $TensorHandle<Builtin.Int32>, {{.*}} : $TensorHandle<Builtin.Int1>)

sil @$doWhileLoop : $@convention(thin) (Builtin.Int32) -> Builtin.Int32 {
bb0(%0 : $Builtin.Int32):
  %1 = integer_literal $Builtin.Int32, 0
  %2 = integer_literal $Builtin.Int32, 1
  %3 = integer_literal $Builtin.Int32, 100
  br bb1 (%3 : $Builtin.Int32)

bb1(%4: $Builtin.Int32):
  %5 = builtin "sadd_with_overflow_Int32"(%4 : $Builtin.Int32, %2 : $Builtin.Int32) : $Builtin.Int32
  %6 = builtin "cmp_eq_Int32"(%5 : $Builtin.Int32, %0 : $Builtin.Int32) : $Builtin.Int1
  cond_br %6, bb2, bb3(%5 : $Builtin.Int32)
	
bb2:
  %7 = builtin "sadd_with_overflow_Int32"(%5 : $Builtin.Int32, %2 : $Builtin.Int32) : $Builtin.Int32
  %8 = builtin "cmp_slt_Int32"(%7 : $Builtin.Int32, %3 : $Builtin.Int32) : $Builtin.Int1
  cond_br %8, bb1(%7 : $Builtin.Int32), bb3(%5 : $Builtin.Int32)

bb3 (%9 : $Builtin.Int32):
	return %9 : $Builtin.Int32
}

// The SIL code is a vastly simplified version of the SIL obtained from swift
// for the following function in -O mode:
// public func natSumWithBreak(_ breakIndex: Int32) -> Tensor<Int32>{
//   var i: Int32 = 1
//   var sum = Tensor<Int32>(0)
//   let maxCount: Int32 = 100
//   while i <= maxCount {
//     sum += i
//     i += 1
//     var j = Tensor<Int32>(breakIndex * i)
//     if (i == breakIndex) {
//       sum -= j
//       break
//     }
//     j -= j
//     sum += j
//   }
//   sum += sum
//   return sum
// }
//
// This is an example where the nodes that lead to the common post dominator of all exit nodes
// is also reachable from nodes outside of the loop. The key issue is that bb9 is the common
// post dominator of the exit blocks bb5 and bb8 of the loop. As a first step, we attempt to
// make the common post dominator the single exit block. In order to do that, we need to
// move the blocks reachable from exit blocks bb5 and bb8 up to, but excluding bb9, into the loop.
// In this example, bb3 needs to be moved into the loop, but is reachable from from bb1 (outside
// the loop) and bb8 (which will be moved inside the loop). To deal with this case, we simply clone
// bb3 before moving.
//
// CHECK-LABEL: --- XLA CFG Canonicalize: $loopThatRequiresNodeCloning
// CHECK: [sequence
// CHECK:   {condition Header: bb0
// CHECK:     block bb1
// CHECK:     [sequence
// CHECK:       <while Preheader: bb2, Header: bb9, exit: bb11
// CHECK:         [sequence
// CHECK:           {condition Header: bb3
// CHECK:             block bb4
// CHECK:             {condition Header: bb5
// CHECK:               block bb7
// CHECK:               block bb6}}
// CHECK:           block bb10]>
// CHECK:       block bb11]}
// CHECK:   block bb8]
// CHECK: --- XLA CFG Canonicalize end
sil @$loopThatRequiresNodeCloning : $@convention(thin) (Builtin.Int32, Builtin.Int32) -> Builtin.Int32 {
bb0(%0 : $Builtin.Int32, %1 : $Builtin.Int32):
  %2 = integer_literal $Builtin.Int32, 1
  %3 = integer_literal $Builtin.Int32, 100
  %4 = builtin "sadd_with_overflow_Int32"(%0 : $Builtin.Int32, %2 : $Builtin.Int32) : $Builtin.Int32
  %5 = builtin "cmp_slt_Int32"(%4 : $Builtin.Int32, %3 : $Builtin.Int32) : $Builtin.Int1
	cond_br %5, bb1, bb2

bb1:
  // First arg is sum, the second arg is loop counter.
  br bb3(%4 : $Builtin.Int32, %0 : $Builtin.Int32)

bb2:
  br bb4(%4 : $Builtin.Int32, %0 : $Builtin.Int32)

bb3(%6 : $Builtin.Int32, %7 : $Builtin.Int32):
  %8 = builtin "ssub_with_overflow_Int32"(%6 : $Builtin.Int32, %7 : $Builtin.Int32) : $Builtin.Int32
  br bb9(%8 : $Builtin.Int32)

bb4(%9 : $Builtin.Int32, %10 : $Builtin.Int32):
  %11 = builtin "sadd_with_overflow_Int32"(%10 : $Builtin.Int32, %2 : $Builtin.Int32) : $Builtin.Int32
  %12 = builtin "sadd_with_overflow_Int32"(%9 : $Builtin.Int32, %10 : $Builtin.Int32) : $Builtin.Int32
  %13 = builtin "cmp_slt_Int32"(%11 : $Builtin.Int32, %1 : $Builtin.Int32) : $Builtin.Int1
  cond_br %13, bb5, bb6

bb5:
  br bb9(%12 : $Builtin.Int32)

bb6:
  %14 = builtin "sadd_with_overflow_Int32"(%11 : $Builtin.Int32, %2 : $Builtin.Int32) : $Builtin.Int32
  %15 = builtin "sadd_with_overflow_Int32"(%12 : $Builtin.Int32, %14 : $Builtin.Int32) : $Builtin.Int32
  %16 = builtin "cmp_slt_Int32"(%14 : $Builtin.Int32, %1 : $Builtin.Int32) : $Builtin.Int1
  cond_br %16, bb8, bb7

bb7:
  br bb4(%15 : $Builtin.Int32, %14 : $Builtin.Int32)

bb8:
  br bb3(%15 : $Builtin.Int32, %14 : $Builtin.Int32)

bb9(%18 : $Builtin.Int32):
  return %18 : $Builtin.Int32
}