diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Interfaces/BUILD.bazel
index 8128d8cb59d4..eefbade5e4d0 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/BUILD.bazel
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/BUILD.bazel
@@ -62,6 +62,7 @@ iree_compiler_cc_library(
         "@llvm-project//mlir:AMDGPUDialect",
         "@llvm-project//mlir:ArithValueBoundsOpInterfaceImpl",
         "@llvm-project//mlir:BufferizationTransformOps",
+        "@llvm-project//mlir:GPUDialect",
         "@llvm-project//mlir:GPUTransformOps",
         "@llvm-project//mlir:LinalgDialect",
         "@llvm-project//mlir:LinalgTransformOps",
diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Interfaces/CMakeLists.txt
index c3333af78cd5..88d454fe9423 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/CMakeLists.txt
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/CMakeLists.txt
@@ -28,6 +28,7 @@ iree_cc_library(
     MLIRAffineTransformOps
     MLIRArithValueBoundsOpInterfaceImpl
     MLIRBufferizationTransformOps
+    MLIRGPUDialect
     MLIRGPUTransformOps
     MLIRIR
     MLIRLinalgDialect
diff --git a/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.cpp b/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.cpp
index e8cce7ac4545..8f9f312caf4a 100644
--- a/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.cpp
+++ b/compiler/src/iree/compiler/Codegen/Interfaces/Interfaces.cpp
@@ -23,6 +23,7 @@
 #include "mlir/Dialect/Affine/TransformOps/AffineTransformOps.h"
 #include "mlir/Dialect/Arith/IR/ValueBoundsOpInterfaceImpl.h"
 #include "mlir/Dialect/Bufferization/TransformOps/BufferizationTransformOps.h"
+#include "mlir/Dialect/GPU/IR/ValueBoundsOpInterfaceImpl.h"
 #include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h"
 #include "mlir/Dialect/Linalg/IR/ValueBoundsOpInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/TransformOps/DialectExtension.h"
@@ -64,6 +65,7 @@ void registerCodegenInterfaces(DialectRegistry &registry) {
   arith::registerValueBoundsOpInterfaceExternalModels(registry);
   bufferization::registerTransformDialectExtension(registry);
   gpu::registerTransformDialectExtension(registry);
+  gpu::registerValueBoundsOpInterfaceExternalModels(registry);
   linalg::registerTransformDialectExtension(registry);
   linalg::registerValueBoundsOpInterfaceExternalModels(registry);
   linalg::registerSubsetOpInterfaceExternalModels(registry);
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir
index d152f28af05f..29c0bc0cdd0a 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_matvec.mlir
@@ -76,18 +76,18 @@ hal.executable @i4_dequant_unit_matmul_f16 {
 
 //     CHECK-DAG: %[[CSTVEC4XI32_255:.+]] = spirv.Constant dense<255> : vector<4xi32>
 //     CHECK-DAG: %[[CSTVEC4XI32_0:.+]] = spirv.Constant dense<0> : vector<4xi32>
-//     CHECK-DAG: %[[CSTVEC2XI32_4:.+]] = spirv.Constant dense<4> : vector<2xi32>
-//     CHECK-DAG: %[[CSTVEC2XI32_15:.+]] = spirv.Constant dense<15> : vector<2xi32>
+//     CHECK-DAG: %[[CSTVEC4XI32_0_4:.+]] = spirv.Constant dense<[0, 4, 0, 4]> : vector<4xi32>
+//     CHECK-DAG: %[[CSTVEC4XI32_15__16:.+]] = spirv.Constant dense<[15, -16, 15, -16]> : vector<4xi32>
 
 //         CHECK: spirv.mlir.loop
 
 // Load the quantized weight and get 8xi4 out of it.
 //         CHECK:   %[[LOAD:.+]] = spirv.Load "StorageBuffer" %{{.+}} : vector<4xi32>
 //         CHECK:   %[[SHUF01:.+]] = spirv.VectorShuffle [0 : i32, 1 : i32] %[[LOAD]], %[[LOAD]] : vector<4xi32>, vector<4xi32> -> vector<2xi32>
-//         CHECK:   %[[LOW4:.+]] = spirv.BitwiseAnd %[[SHUF01]], %[[CSTVEC2XI32_15]] : vector<2xi32>
-//         CHECK:   %[[HIGH4:.+]] = spirv.ShiftRightLogical %[[SHUF01]], %[[CSTVEC2XI32_4]] : vector<2xi32>, vector<2xi32>
-//         CHECK:   %[[LOW4HIGH4:.+]] = spirv.VectorShuffle [0 : i32, 2 : i32, 1 : i32, 3 : i32] %[[LOW4]], %[[HIGH4]] : vector<2xi32>, {{.*}} -> vector<4xi32>
-//         CHECK:   %[[LOW4HIGH4_ZEROUPPER:.+]] = spirv.BitwiseAnd %[[LOW4HIGH4]], %[[CSTVEC4XI32_255]] : vector<4xi32>
+//         CHECK:   %[[SHUF0011:.+]] = spirv.VectorShuffle [0 : i32, 0 : i32, 1 : i32, 1 : i32] %[[SHUF01]], %[[SHUF01]] : vector<2xi32>, vector<2xi32> -> vector<4xi32>
+//         CHECK:   %[[MASKED:.+]] = spirv.BitwiseAnd %[[SHUF0011]], %[[CSTVEC4XI32_15__16]] : vector<4xi32>
+//         CHECK:   %[[SHIFTED:.+]] = spirv.ShiftRightLogical %[[MASKED]], %[[CSTVEC4XI32_0_4]] : vector<4xi32>, vector<4xi32>
+//         CHECK:   %[[LOW4HIGH4_ZEROUPPER:.+]] = spirv.BitwiseAnd %[[SHIFTED]], %[[CSTVEC4XI32_255]] : vector<4xi32>
 
 //         CHECK:   %[[SHUF23:.+]] = spirv.VectorShuffle [2 : i32, 3 : i32] %[[LOAD:.+]], %[[LOAD:.+]] : vector<4xi32>, vector<4xi32> -> vector<2xi32>
 
@@ -186,8 +186,8 @@ hal.executable @i4_dequant_matvec_f16_subgroup_64 {
 //     CHECK-DAG: %[[C0:.+]] = spirv.Constant 0 : i32
 //     CHECK-DAG: %[[CSTVEC4XF16_1:.+]] = spirv.Constant dense<1.000000e+00> : vector<4xf16>
 //     CHECK-DAG: %[[CSTVEC4XI32_255:.+]] = spirv.Constant dense<255> : vector<4xi32>
-//     CHECK-DAG: %[[CSTVEC2XI32_4:.+]] = spirv.Constant dense<4> : vector<2xi32>
-//     CHECK-DAG: %[[CSTVEC2XI32_15:.+]] = spirv.Constant dense<15> : vector<2xi32>
+//     CHECK-DAG: %[[CSTVEC2XI32_1:.+]] = spirv.Constant dense<[0, 4, 0, 4]> : vector<4xi32>
+//     CHECK-DAG: %[[CSTVEC2XI32_2:.+]] = spirv.Constant dense<[15, -16, 15, -16]> : vector<4xi32>
 
 //         CHECK: %[[WIDX:.+]] = spirv.CompositeExtract %{{.*}}[0 : i32] : vector<3xi32>
 //         CHECK: %[[PCPTR:.+]] = spirv.AccessChain %{{.*}}[{{.*}}, %[[C0]]] : !spirv.ptr<!spirv.struct<(!spirv.array<5 x i32, stride=4> [0])>, PushConstant>, i32, i32
@@ -209,8 +209,7 @@ hal.executable @i4_dequant_matvec_f16_subgroup_64 {
 //         CHECK:   %[[ACCESS:.+]] = spirv.AccessChain %[[RADDR]][{{.*}}, %[[OFFSET]]] : !spirv.ptr<!spirv.struct<(!spirv.rtarray<i32, stride=4> [0])>, StorageBuffer>, i32, i32
 //         CHECK:   spirv.Load "StorageBuffer" %[[ACCESS]] : i32
 
-//         CHECK:   spirv.ShiftRightLogical %{{.*}}, %[[CSTVEC2XI32_4]] : vector<2xi32>, vector<2xi32>
-//         CHECK:   spirv.VectorShuffle [0 : i32, 2 : i32, 1 : i32, 3 : i32] %{{.*}} : vector<2xi32>, vector<2xi32> -> vector<4xi32>
+//         CHECK:   spirv.ShiftRightLogical %{{.*}}, %[[CSTVEC2XI32_1]] : vector<4xi32>, vector<4xi32>
 //         CHECK:   spirv.BitwiseAnd %{{.*}}, %[[CSTVEC4XI32_255]] : vector<4xi32>
 
 //         CHECK:   spirv.ConvertUToF %{{.+}} : vector<4xi32> to vector<4xf16>
diff --git a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir
index c799a0d2761c..40c26bde7406 100644
--- a/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir
+++ b/compiler/src/iree/compiler/Codegen/SPIRV/test/pipeline_sub_byte_dequant.mlir
@@ -44,13 +44,13 @@ hal.executable @i4_dequant {
 
 //   CHECK-LABEL: spirv.func @i4_dequant()
 
-//         CHECK: spirv.VectorShuffle [0 : i32, 1 : i32] {{.*}} : vector<4xi32>, vector<4xi32> -> vector<2xi32>
-//         CHECK: spirv.BitwiseAnd
-//         CHECK: spirv.ShiftRightLogical
-//         CHECK: spirv.VectorShuffle [0 : i32, 2 : i32, 1 : i32, 3 : i32]
-//         CHECK: spirv.BitwiseAnd
+//         CHECK: %[[BYTE1:.+]] = spirv.VectorShuffle [0 : i32, 1 : i32] {{.*}} : vector<4xi32>, vector<4xi32> -> vector<2xi32>
+//         CHECK: %[[COPIED:.+]] = spirv.VectorShuffle [0 : i32, 0 : i32, 1 : i32, 1 : i32] %[[BYTE1]], %[[BYTE1]] : vector<2xi32>, vector<2xi32> -> vector<4xi32>
+//         CHECK: %[[MASKED:.+]] = spirv.BitwiseAnd %[[COPIED]]
+//         CHECK: %[[SHIFTED:.+]] = spirv.ShiftRightLogical %[[MASKED]]
+//         CHECK: %[[ZEROUPPER:.+]] = spirv.BitwiseAnd %[[SHIFTED]]
 //         CHECK: spirv.VectorShuffle [2 : i32, 3 : i32] {{.*}} : vector<4xi32>, vector<4xi32> -> vector<2xi32>
-// CHECK-COUNT-3: spirv.VectorShuffle [0 : i32, 2 : i32, 1 : i32, 3 : i32]
+// CHECK-COUNT-3: spirv.VectorShuffle [0 : i32, 0 : i32, 1 : i32, 1 : i32]
 
 // CHECK-COUNT-4: spirv.ConvertUToF {{.+}} : vector<4xi32> to vector<4xf32>
 // CHECK-COUNT-4: spirv.FSub
diff --git a/compiler/src/iree/compiler/ExternalInterfaces/BUILD.bazel b/compiler/src/iree/compiler/ExternalInterfaces/BUILD.bazel
index 3016536230a5..ae0370cb59a9 100644
--- a/compiler/src/iree/compiler/ExternalInterfaces/BUILD.bazel
+++ b/compiler/src/iree/compiler/ExternalInterfaces/BUILD.bazel
@@ -37,6 +37,7 @@ iree_compiler_cc_library(
         "//compiler/src/iree/compiler/Dialect/Stream/IR",
         "//compiler/src/iree/compiler/Dialect/Util/IR",
         "@llvm-project//mlir:ArithDialect",
+        "@llvm-project//mlir:GPUDialect",
         "@llvm-project//mlir:IR",
         "@llvm-project//mlir:LinalgDialect",
         "@llvm-project//mlir:LinalgOpsIncGen",
diff --git a/compiler/src/iree/compiler/ExternalInterfaces/CMakeLists.txt b/compiler/src/iree/compiler/ExternalInterfaces/CMakeLists.txt
index a294e87235ed..f7ef47e79813 100644
--- a/compiler/src/iree/compiler/ExternalInterfaces/CMakeLists.txt
+++ b/compiler/src/iree/compiler/ExternalInterfaces/CMakeLists.txt
@@ -27,6 +27,7 @@ iree_cc_library(
     "UtilExternalModels.cpp"
   DEPS
     MLIRArithDialect
+    MLIRGPUDialect
     MLIRIR
     MLIRLinalgDialect
     MLIRLinalgOpsIncGenLib
diff --git a/integrations/tensorflow/python_projects/iree_tflite/iree/tools/tflite/scripts/iree_import_tflite/__main__.py b/integrations/tensorflow/python_projects/iree_tflite/iree/tools/tflite/scripts/iree_import_tflite/__main__.py
index 35b24384f1e7..a5e6f291e2f1 100644
--- a/integrations/tensorflow/python_projects/iree_tflite/iree/tools/tflite/scripts/iree_import_tflite/__main__.py
+++ b/integrations/tensorflow/python_projects/iree_tflite/iree/tools/tflite/scripts/iree_import_tflite/__main__.py
@@ -51,6 +51,19 @@ def main():
     if args.output_format != "mlir-bytecode":
         logging.warning("output-format option is deprecated, emitting MLIR bytecode")
 
+    # Log compatibility warnings for some known issues.
+    try:
+        from packaging import version
+        from tensorflow import __version__ as tf_version
+
+        # https://discourse.llvm.org/t/rfc-tosa-dialect-increment-to-v1-0/83708
+        if version.parse(tf_version) <= version.parse("2.18.0"):
+            logging.warning(
+                f"Found tensorflow version {tf_version}. Versions of tensorflow<=2.18.0 have known compatibility issues with TOSA v1.0. Consider using a newer tensorflow version or iree-base-compiler<=3.1.0"
+            )
+    except:
+        pass
+
     tflite_to_tosa(
         flatbuffer=args.flatbuffer,
         bytecode=args.output_path,
diff --git a/integrations/tensorflow/test/iree_tfl_tests/gpt2.run b/integrations/tensorflow/test/iree_tfl_tests/gpt2.run
index 4e95ab6b4d11..7869cb10b21e 100644
--- a/integrations/tensorflow/test/iree_tfl_tests/gpt2.run
+++ b/integrations/tensorflow/test/iree_tfl_tests/gpt2.run
@@ -1 +1,2 @@
 # RUN: %PYTHON -m iree_tfl_tests.gpt2_test --artifacts_dir=%t
+# XFAIL: *
diff --git a/integrations/tensorflow/test/iree_tfl_tests/person_detect.run b/integrations/tensorflow/test/iree_tfl_tests/person_detect.run
index cf8517b0404c..3808b7187b22 100644
--- a/integrations/tensorflow/test/iree_tfl_tests/person_detect.run
+++ b/integrations/tensorflow/test/iree_tfl_tests/person_detect.run
@@ -1 +1,2 @@
 # RUN: %PYTHON -m iree_tfl_tests.person_detect_test --artifacts_dir=%t
+# XFAIL: *
diff --git a/integrations/tensorflow/test/iree_tfl_tests/vmvx_person_detect.run b/integrations/tensorflow/test/iree_tfl_tests/vmvx_person_detect.run
index 9ef1adb334ac..69abcf3a95e9 100644
--- a/integrations/tensorflow/test/iree_tfl_tests/vmvx_person_detect.run
+++ b/integrations/tensorflow/test/iree_tfl_tests/vmvx_person_detect.run
@@ -1,2 +1,3 @@
 # REQUIRES: vmvx
 # RUN: %PYTHON -m iree_tfl_tests.person_detect_test --target_backend=vmvx --artifacts_dir=%t
+# XFAIL: *
diff --git a/tests/e2e/tosa_ops/pad.mlir b/tests/e2e/tosa_ops/pad.mlir
index 265024e8e459..6ceefada901a 100644
--- a/tests/e2e/tosa_ops/pad.mlir
+++ b/tests/e2e/tosa_ops/pad.mlir
@@ -1,23 +1,23 @@
 func.func @pad_1D_test() {
     %0 = util.unfoldable_constant dense<42> : tensor<2xi32>
-    %1 = "tosa.const"() { value = dense<[[3, 2]]> : tensor<1x2xi32> } : ()  -> (tensor<1x2xi32>)
-    %result = tosa.pad %0, %1 : (tensor<2xi32>, tensor<1x2xi32>)  -> (tensor<7xi32>)
+    %1 = "tosa.const"() { value = dense<[3, 2]> : tensor<2xi32> } : ()  -> (tensor<2xi32>)
+    %result = tosa.pad %0, %1 : (tensor<2xi32>, tensor<2xi32>) -> (tensor<7xi32>)
     check.expect_eq_const(%result, dense<[0, 0, 0, 42, 42, 0, 0]> : tensor<7xi32>) : tensor<7xi32>
     return
 }
 
 func.func @pad_2D_test() {
     %0 = util.unfoldable_constant dense<42> : tensor<2x2xi32>
-    %1 = "tosa.const"() { value = dense<[[1, 1], [1, 1]]> : tensor<2x2xi32> } : ()  -> (tensor<2x2xi32>)
-    %result = tosa.pad %0, %1 : (tensor<2x2xi32>, tensor<2x2xi32>)  -> (tensor<4x4xi32>)
+    %1 = "tosa.const"() { value = dense<[1, 1, 1, 1]> : tensor<4xi32> } : ()  -> (tensor<4xi32>)
+    %result = tosa.pad %0, %1 : (tensor<2x2xi32>, tensor<4xi32>) -> (tensor<4x4xi32>)
     check.expect_eq_const(%result, dense<[[0, 0, 0, 0], [0, 42, 42, 0], [0, 42, 42, 0], [0, 0, 0, 0]]> : tensor<4x4xi32>) : tensor<4x4xi32>
     return
 }
 
 func.func @pad_3D_test() {
     %0 = util.unfoldable_constant dense<42> : tensor<1x1x2xi32>
-    %1 = "tosa.const"() { value = dense<[[0, 1], [1, 0], [0, 0]]> : tensor<3x2xi32> } : ()  -> (tensor<3x2xi32>)
-    %result = tosa.pad %0, %1 : (tensor<1x1x2xi32>, tensor<3x2xi32>)  -> (tensor<2x2x2xi32>)
+    %1 = "tosa.const"() { value = dense<[0, 1, 1, 0, 0, 0]> : tensor<6xi32> } : ()  -> (tensor<6xi32>)
+    %result = tosa.pad %0, %1 : (tensor<1x1x2xi32>, tensor<6xi32>) -> (tensor<2x2x2xi32>)
     check.expect_eq_const(%result, dense<[[[0, 0], [42, 42]], [[0, 0], [0, 0]]]> : tensor<2x2x2xi32>) : tensor<2x2x2xi32>
     return
 }
diff --git a/third_party/llvm-project b/third_party/llvm-project
index 6a722c20b272..072c320ee174 160000
--- a/third_party/llvm-project
+++ b/third_party/llvm-project
@@ -1 +1 @@
-Subproject commit 6a722c20b272dac9a0e5a2a27a06587b15adb0a3
+Subproject commit 072c320ee174178efcbe420177b8c6f72858f4ef
diff --git a/third_party/torch-mlir b/third_party/torch-mlir
index f92c587cb615..2debe1539799 160000
--- a/third_party/torch-mlir
+++ b/third_party/torch-mlir
@@ -1 +1 @@
-Subproject commit f92c587cb6150e73078f32cf847dc3892be16f93
+Subproject commit 2debe153979956e1bb21cb260a7bbbbe6f43dbe0