Skip to content

Commit

Permalink
[XLA:CPU] Allow the shape partition algorithm to partition the most m…
Browse files Browse the repository at this point in the history
…inor

dimension.

The current shape paritition algorithm does not partition the most minor
dimension, because doing so causes dynamic loop bounds for the inner loop and
used to prohibit LLVM vectorization. This constraint has been removed with
revision 328478 and LLVM can now vectorize loops with dynamic bounds. Allow
partitioning the most minor dimension is also necessary to support the
parallelization of matrix-vector multiplication.

Adjust shape_partition_test to reflect this change in the shape partition
algorithm.

PiperOrigin-RevId: 190574615
  • Loading branch information
bixia1 authored and tensorflower-gardener committed Mar 27, 2018
1 parent b16ec31 commit 307794e
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 82 deletions.
5 changes: 3 additions & 2 deletions tensorflow/compiler/xla/service/cpu/shape_partition.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@ namespace cpu {

std::vector<int64> ShapePartitionAssigner::Run(int64 target_partition_count) {
// Gather outer-most dims where dim_size >= 'target_partition_count'.
// Note: always leave inner-dim static for vectorization/optimizations.
// This may include the inner-dim as LLVM can vectorize loops with dynamic
// bounds.
std::vector<int64> outer_dims;
int64 outer_dim_size = 1;
// TODO(b/27458679) Consider reserving enough minor dimensions (based on
// target vector register width) to enable vector instructions.
for (int i = shape_.layout().minor_to_major_size() - 1; i >= 1; --i) {
for (int i = shape_.layout().minor_to_major_size() - 1; i >= 0; --i) {
const int64 dimension = shape_.layout().minor_to_major(i);
outer_dims.push_back(dimension);
outer_dim_size *= shape_.dimensions(dimension);
Expand Down
116 changes: 38 additions & 78 deletions tensorflow/compiler/xla/service/cpu/shape_partition_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -30,105 +30,65 @@ class ShapePartitionAssignerTest : public HloTestBase {
protected:
typedef std::vector<int64> Vec;

void RunR2Test(const Shape& shape, const int64 expected_max_partition_count) {
void RunR2Test(const Shape& shape, int64 max_target_partition_count,
const std::vector<int64>* expected_partitions) {
ShapePartitionAssigner assigner(shape);
// Check all partitions of outer dimension.
for (int64 i = 1; i <= expected_max_partition_count; ++i) {
EXPECT_TRUE(ContainersEqual(Vec({i}),
assigner.Run(/*target_partition_count=*/i)));
// Iterate through 1..max_target_partition_count.
for (int64 i = 1; i <= max_target_partition_count; ++i) {
std::vector<int64> actual_partitions =
assigner.Run(/*target_partition_count=*/i);
EXPECT_THAT(actual_partitions, expected_partitions[i - 1]);
}
// Check target_partition_count > outer dimension size.
EXPECT_TRUE(ContainersEqual(
Vec({expected_max_partition_count}),
assigner.Run(
/*target_partition_count=*/expected_max_partition_count + 1)));
}
};

TEST_F(ShapePartitionAssignerTest, Shape13WithLayout10) {
RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {1, 3}, {1, 0}), 1);
std::vector<int64> expected_partitions[] = {{1} /* 1 */, {1, 2} /* 2 */};
RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {1, 3}, {1, 0}), 2,
expected_partitions);
}

TEST_F(ShapePartitionAssignerTest, Shape31WithLayout01) {
RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {3, 1}, {0, 1}), 1);
std::vector<int64> expected_partitions[] = {
{1} /* 1 */, {1, 2} /* 2 */
};
RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {3, 1}, {0, 1}), 2,
expected_partitions);
}

TEST_F(ShapePartitionAssignerTest, Shape53WithLayout10) {
RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {5, 3}, {1, 0}), 5);
std::vector<int64> expected_partitions[] = {{1} /* 1 */, {2} /* 2 */,
{3} /* 3 */, {4} /* 4 */,
{5} /* 5 */, {3, 2} /* 6 */};
RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {5, 3}, {1, 0}), 6,
expected_partitions);
}

TEST_F(ShapePartitionAssignerTest, Shape53WithLayout01) {
RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {5, 3}, {0, 1}), 3);
std::vector<int64> expected_partitions[] = {
{1} /* 1 */, {2} /* 2 */, {3} /* 3 */, {2, 2} /* 4 */};
RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {5, 3}, {0, 1}), 4,
expected_partitions);
}

TEST_F(ShapePartitionAssignerTest, Shape532WithLayout210) {
Shape shape = ShapeUtil::MakeShapeWithLayout(F32, {5, 3, 2}, {2, 1, 0});
ShapePartitionAssigner assigner(shape);

for (int64 i = 1; i <= 5; ++i) {
EXPECT_TRUE(ContainersEqual(Vec({i}), assigner.Run(
/*target_partition_count=*/i)));
}

EXPECT_TRUE(
ContainersEqual(Vec({3, 2}), assigner.Run(/*target_partition_count=*/6)));
EXPECT_TRUE(
ContainersEqual(Vec({3, 2}), assigner.Run(/*target_partition_count=*/7)));
EXPECT_TRUE(
ContainersEqual(Vec({4, 2}), assigner.Run(/*target_partition_count=*/8)));
EXPECT_TRUE(
ContainersEqual(Vec({3, 3}), assigner.Run(/*target_partition_count=*/9)));
EXPECT_TRUE(ContainersEqual(Vec({3, 3}),
assigner.Run(/*target_partition_count=*/10)));
EXPECT_TRUE(ContainersEqual(Vec({3, 3}),
assigner.Run(/*target_partition_count=*/11)));
EXPECT_TRUE(ContainersEqual(Vec({4, 3}),
assigner.Run(/*target_partition_count=*/12)));
EXPECT_TRUE(ContainersEqual(Vec({4, 3}),
assigner.Run(/*target_partition_count=*/13)));
EXPECT_TRUE(ContainersEqual(Vec({4, 3}),
assigner.Run(/*target_partition_count=*/14)));
EXPECT_TRUE(ContainersEqual(Vec({5, 3}),
assigner.Run(/*target_partition_count=*/15)));
EXPECT_TRUE(ContainersEqual(Vec({5, 3}),
assigner.Run(/*target_partition_count=*/16)));
std::vector<int64> expected_partitions[] = {
{1} /* 1 */, {2} /* 2 */, {3} /* 3 */, {4} /* 4 */,
{5} /* 5 */, {3, 2} /* 6 */, {3, 2} /* 7 */, {4, 2} /* 8 */,
{3, 3} /* 9 */, {3, 3} /* 10 */, {3, 3} /* 11 */, {4, 3} /* 12 */,
{4, 3} /* 13 */, {4, 3} /* 14 */, {5, 3} /* 15 */, {4, 2, 2} /* 16 */};
RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {5, 3, 2}, {2, 1, 0}), 16,
expected_partitions);
}

TEST_F(ShapePartitionAssignerTest, Shape532WithLayout201) {
Shape shape = ShapeUtil::MakeShapeWithLayout(F32, {5, 3, 2}, {2, 0, 1});
ShapePartitionAssigner assigner(shape);

for (int64 i = 1; i <= 3; ++i) {
EXPECT_TRUE(ContainersEqual(Vec({i}), assigner.Run(
/*target_partition_count=*/i)));
}

EXPECT_TRUE(
ContainersEqual(Vec({2, 2}), assigner.Run(/*target_partition_count=*/4)));
EXPECT_TRUE(
ContainersEqual(Vec({2, 2}), assigner.Run(/*target_partition_count=*/5)));
EXPECT_TRUE(
ContainersEqual(Vec({3, 2}), assigner.Run(/*target_partition_count=*/6)));
EXPECT_TRUE(
ContainersEqual(Vec({3, 2}), assigner.Run(/*target_partition_count=*/7)));
EXPECT_TRUE(
ContainersEqual(Vec({3, 2}), assigner.Run(/*target_partition_count=*/8)));
EXPECT_TRUE(
ContainersEqual(Vec({3, 3}), assigner.Run(/*target_partition_count=*/9)));
EXPECT_TRUE(ContainersEqual(Vec({3, 3}),
assigner.Run(/*target_partition_count=*/10)));
EXPECT_TRUE(ContainersEqual(Vec({3, 3}),
assigner.Run(/*target_partition_count=*/11)));
EXPECT_TRUE(ContainersEqual(Vec({3, 4}),
assigner.Run(/*target_partition_count=*/12)));
EXPECT_TRUE(ContainersEqual(Vec({3, 4}),
assigner.Run(/*target_partition_count=*/13)));
EXPECT_TRUE(ContainersEqual(Vec({3, 4}),
assigner.Run(/*target_partition_count=*/14)));
EXPECT_TRUE(ContainersEqual(Vec({3, 5}),
assigner.Run(/*target_partition_count=*/15)));
EXPECT_TRUE(ContainersEqual(Vec({3, 5}),
assigner.Run(/*target_partition_count=*/16)));
std::vector<int64> expected_partitions[] = {
{1} /* 1 */, {2} /* 2 */, {3} /* 3 */, {2, 2} /* 4 */,
{2, 2} /* 5 */, {3, 2} /* 6 */, {3, 2} /* 7 */, {3, 2} /* 8 */,
{3, 3} /* 9 */, {3, 3} /* 10 */, {3, 3} /* 11 */, {3, 4} /* 12 */,
{3, 4} /* 13 */, {3, 4} /* 14 */, {3, 5} /* 15 */, {3, 2, 2} /* 16 */};
RunR2Test(ShapeUtil::MakeShapeWithLayout(F32, {5, 3, 2}, {2, 0, 1}), 16,
expected_partitions);
}

class ShapePartitionIteratorTest : public HloTestBase {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ XLA_TEST_P(ExhaustiveF32ElementwiseOpTest, LogF32) {
#ifdef XLA_TEST_BACKEND_CPU
// TODO(b/73141998): The vectorized Log implementation gives results outside
// our error spec in this range (these numbers are bitwise representations of
// floats expressed as a zero extended int64):
std::pair<int64, int64> known_incorrect_range = {1, 8315654};
// floats expressed as a zero extended int64).
std::pair<int64, int64> known_incorrect_range = {1, 8388608};
#else
std::pair<int64, int64> known_incorrect_range = {0, 0};
#endif
Expand Down

0 comments on commit 307794e

Please sign in to comment.