diff --git a/BUILD.bazel b/BUILD.bazel index e6ff97dd6e..526d08e39f 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -447,6 +447,7 @@ cc_library( "tools_util", ":spirv_tools_internal", ":test_lib", + ":tools_io", "@googletest//:gtest", "@googletest//:gtest_main", ], diff --git a/BUILD.gn b/BUILD.gn index 533cca37fd..7c361f0337 100644 --- a/BUILD.gn +++ b/BUILD.gn @@ -1377,6 +1377,7 @@ if (build_with_chromium && spvtools_build_executables) { "test/fix_word_test.cpp", "test/generator_magic_number_test.cpp", "test/hex_float_test.cpp", + "test/hex_to_text_test.cpp", "test/immediate_int_test.cpp", "test/libspirv_macros_test.cpp", "test/name_mapper_test.cpp", @@ -1424,6 +1425,7 @@ if (build_with_chromium && spvtools_build_executables) { ":spvtools_language_header_cldebuginfo100", ":spvtools_language_header_debuginfo", ":spvtools_language_header_vkdebuginfo100", + ":spvtools_tools_io", ":spvtools_val", "//testing/gmock", "//testing/gtest", diff --git a/README.md b/README.md index 7db5bd42a7..d1ce2fbed8 100644 --- a/README.md +++ b/README.md @@ -80,6 +80,8 @@ further notice. * Assembler only does basic syntax checking. No cross validation of IDs or types is performed, except to check literal arguments to `OpConstant`, `OpSpecConstant`, and `OpSwitch`. +* Where tools expect binary input, a hex stream may be provided instead. See + `spirv-dis --help`. See [`docs/syntax.md`](docs/syntax.md) for the assembly language syntax. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 76940ce1f2..119e9c9f25 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -88,6 +88,7 @@ endfunction() set(TEST_SOURCES test_fixture.h unit_spirv.h + ${spirv-tools_SOURCE_DIR}/tools/io.h assembly_context_test.cpp assembly_format_test.cpp @@ -110,6 +111,7 @@ set(TEST_SOURCES fix_word_test.cpp generator_magic_number_test.cpp hex_float_test.cpp + hex_to_text_test.cpp immediate_int_test.cpp libspirv_macros_test.cpp named_id_test.cpp @@ -154,6 +156,7 @@ set(TEST_SOURCES to_string_test.cpp unit_spirv.cpp + ${spirv-tools_SOURCE_DIR}/tools/io.cpp ) spvtools_pch(TEST_SOURCES pch_test) diff --git a/test/diff/diff_test.cpp b/test/diff/diff_test.cpp index 3b63c69c7c..da869315da 100644 --- a/test/diff/diff_test.cpp +++ b/test/diff/diff_test.cpp @@ -20,7 +20,6 @@ #include "source/opt/ir_context.h" #include "source/spirv_constant.h" #include "spirv-tools/libspirv.hpp" -#include "tools/io.h" #include "tools/util/cli_consumer.h" #include diff --git a/test/diff/diff_test_utils.cpp b/test/diff/diff_test_utils.cpp index 14bb821536..3bea9c3b18 100644 --- a/test/diff/diff_test_utils.cpp +++ b/test/diff/diff_test_utils.cpp @@ -18,7 +18,6 @@ #include "source/opt/ir_context.h" #include "spirv-tools/libspirv.hpp" -#include "tools/io.h" #include "tools/util/cli_consumer.h" #include "gtest/gtest.h" diff --git a/test/hex_to_text_test.cpp b/test/hex_to_text_test.cpp new file mode 100644 index 0000000000..cc5c673f8d --- /dev/null +++ b/test/hex_to_text_test.cpp @@ -0,0 +1,429 @@ +// Copyright (c) 2024 Google LLC. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "gmock/gmock.h" +#include "test/test_fixture.h" +#include "tools/io.h" + +namespace spvtools { +namespace { + +using spvtest::ScopedContext; + +class HexToText : public ::testing::Test { + public: + void VerifyDisassembly(const char* hex_stream, + const char* expected_disassembly) { + std::vector stream(hex_stream, hex_stream + strlen(hex_stream)); + std::vector binary; + + // Convert hext to binary first. + EXPECT_TRUE(ConvertHexToBinary(stream, &binary)); + + // Then disassemble it. + spv_diagnostic diagnostic = nullptr; + spv_text disassembly = nullptr; + EXPECT_EQ(spvBinaryToText(ScopedContext().context, binary.data(), + binary.size(), SPV_BINARY_TO_TEXT_OPTION_NONE, + &disassembly, &diagnostic), + SPV_SUCCESS); + EXPECT_EQ(diagnostic, nullptr); + + // Verify disassembly is as expected and clean up. + EXPECT_STREQ(disassembly->str, expected_disassembly); + + spvDiagnosticDestroy(diagnostic); + spvTextDestroy(disassembly); + } + + void EnsureError(const char* hex_stream) { + std::vector stream(hex_stream, hex_stream + strlen(hex_stream)); + std::vector binary; + + // Make sure there is a parse error + EXPECT_FALSE(ConvertHexToBinary(stream, &binary)); + } +}; + +// The actual assembly doesn't matter, just the hex parsing. All the tests use +// the following SPIR-V. +constexpr char kDisassembly[] = R"(; SPIR-V +; Version: 1.6 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 11 +; Schema: 0 +OpCapability Shader +OpMemoryModel Logical GLSL450 +OpEntryPoint Vertex %1 "main" %2 %3 +OpName %2 "fancy_attribute" +OpName %3 "useful_output" +OpDecorate %2 Location 4 +OpDecorate %3 Location 2 +%4 = OpTypeFloat 32 +%5 = OpTypePointer Input %4 +%2 = OpVariable %5 Input +%6 = OpTypePointer Output %4 +%3 = OpVariable %6 Output +%7 = OpTypeVoid +%8 = OpTypeFunction %7 +%1 = OpFunction %7 None %8 +%9 = OpLabel +%10 = OpLoad %4 %2 +OpStore %3 %10 +OpReturn +OpFunctionEnd +)"; + +TEST_F(HexToText, Words) { + constexpr char kHex[] = R"(0x07230203, 0x00010600, 0x00070000, 0x0000000b +0x00000000, 0x00020011, 0x00000001, 0x0003000e +0x00000000, 0x00000001, 0x0007000f, 0x00000000 +0x00000001, 0x6e69616d, 0x00000000, 0x00000002 +0x00000003, 0x00060005, 0x00000002, 0x636e6166 +0x74615f79, 0x62697274, 0x00657475, 0x00060005 +0x00000003, 0x66657375, 0x6f5f6c75, 0x75707475 +0x00000074, 0x00040047, 0x00000002, 0x0000001e +0x00000004, 0x00040047, 0x00000003, 0x0000001e +0x00000002, 0x00030016, 0x00000004, 0x00000020 +0x00040020, 0x00000005, 0x00000001, 0x00000004 +0x0004003b, 0x00000005, 0x00000002, 0x00000001 +0x00040020, 0x00000006, 0x00000003, 0x00000004 +0x0004003b, 0x00000006, 0x00000003, 0x00000003 +0x00020013, 0x00000007, 0x00030021, 0x00000008 +0x00000007, 0x00050036, 0x00000007, 0x00000001 +0x00000000, 0x00000008, 0x000200f8, 0x00000009 +0x0004003d, 0x00000004, 0x0000000a, 0x00000002 +0x0003003e, 0x00000003, 0x0000000a, 0x000100fd +0x00010038)"; + + VerifyDisassembly(kHex, kDisassembly); +} + +TEST_F(HexToText, WordsLeadingSpace) { + constexpr char kHex[] = R"( +x07230203, x00010600, x00070000, x0000000b +x00000000, x00020011, x00000001, x0003000e +x00000000, x00000001, x0007000f, x00000000 +x00000001, x6e69616d, x00000000, x00000002 +x00000003, x00060005, x00000002, x636e6166 +x74615f79, x62697274, x00657475, x00060005 +x00000003, x66657375, x6f5f6c75, x75707475 +x00000074, x00040047, x00000002, x0000001e +x00000004, x00040047, x00000003, x0000001e +x00000002, x00030016, x00000004, x00000020 +x00040020, x00000005, x00000001, x00000004 +x0004003b, x00000005, x00000002, x00000001 +x00040020, x00000006, x00000003, x00000004 +x0004003b, x00000006, x00000003, x00000003 +x00020013, x00000007, x00030021, x00000008 +x00000007, x00050036, x00000007, x00000001 +x00000000, x00000008, x000200f8, x00000009 +x0004003d, x00000004, x0000000a, x00000002 +x0003003e, x00000003, x0000000a, x000100fd +x00010038)"; + + VerifyDisassembly(kHex, kDisassembly); +} + +TEST_F(HexToText, WordsTrailingSpace) { + constexpr char kHex[] = R"(0X7230203, 0X10600, 0X70000, 0XB +0X0, 0X20011, 0X1, 0X3000E +0X0, 0X1, 0X7000F, 0X0 +0X1, X6E69616D, 0X0, 0X2 +0X3, 0X60005, 0X2, X636E6166 +X74615F79, X62697274, 0X657475, 0X60005 +0X3, X66657375, X6F5F6C75, X75707475 +0X74, 0X40047, 0X2, 0X1E +0X4, 0X40047, 0X3, 0X1E +0X2, 0X30016, 0X4, 0X20 +0X40020, 0X5, 0X1, 0X4 +0X4003B, 0X5, 0X2, 0X1 +0X40020, 0X6, 0X3, 0X4 +0X4003B, 0X6, 0X3, 0X3 +0X20013, 0X7, 0X30021, 0X8 +0X7, 0X50036, 0X7, 0X1 +0X0, 0X8, 0X200F8, 0X9 +0X4003D, 0X4, 0XA, 0X2 +0X3003E, 0X3, 0XA, 0X100FD +0X10038 + +)"; + + VerifyDisassembly(kHex, kDisassembly); +} + +TEST_F(HexToText, BytesLittleEndian) { + constexpr char kHex[] = R"( +0x03 0x02 0x23 0x07 0x00 0x06 0x01 0x00 0x00 0x00 0x07 0x00 0x0b 0x00 0x00 0x00 +0x00 0x00 0x00 0x00 0x11 0x00 0x02 0x00 0x01 0x00 0x00 0x00 0x0e 0x00 0x03 0x00 +0x00 0x00 0x00 0x00 0x01 0x00 0x00 0x00 0x0f 0x00 0x07 0x00 0x00 0x00 0x00 0x00 +0x01 0x00 0x00 0x00 0x6d 0x61 0x69 0x6e 0x00 0x00 0x00 0x00 0x02 0x00 0x00 0x00 +0x03 0x00 0x00 0x00 0x05 0x00 0x06 0x00 0x02 0x00 0x00 0x00 0x66 0x61 0x6e 0x63 +0x79 0x5f 0x61 0x74 0x74 0x72 0x69 0x62 0x75 0x74 0x65 0x00 0x05 0x00 0x06 0x00 +0x03 0x00 0x00 0x00 0x75 0x73 0x65 0x66 0x75 0x6c 0x5f 0x6f 0x75 0x74 0x70 0x75 +0x74 0x00 0x00 0x00 0x47 0x00 0x04 0x00 0x02 0x00 0x00 0x00 0x1e 0x00 0x00 0x00 +0x04 0x00 0x00 0x00 0x47 0x00 0x04 0x00 0x03 0x00 0x00 0x00 0x1e 0x00 0x00 0x00 +0x02 0x00 0x00 0x00 0x16 0x00 0x03 0x00 0x04 0x00 0x00 0x00 0x20 0x00 0x00 0x00 +0x20 0x00 0x04 0x00 0x05 0x00 0x00 0x00 0x01 0x00 0x00 0x00 0x04 0x00 0x00 0x00 +0x3b 0x00 0x04 0x00 0x05 0x00 0x00 0x00 0x02 0x00 0x00 0x00 0x01 0x00 0x00 0x00 +0x20 0x00 0x04 0x00 0x06 0x00 0x00 0x00 0x03 0x00 0x00 0x00 0x04 0x00 0x00 0x00 +0x3b 0x00 0x04 0x00 0x06 0x00 0x00 0x00 0x03 0x00 0x00 0x00 0x03 0x00 0x00 0x00 +0x13 0x00 0x02 0x00 0x07 0x00 0x00 0x00 0x21 0x00 0x03 0x00 0x08 0x00 0x00 0x00 +0x07 0x00 0x00 0x00 0x36 0x00 0x05 0x00 0x07 0x00 0x00 0x00 0x01 0x00 0x00 0x00 +0x00 0x00 0x00 0x00 0x08 0x00 0x00 0x00 0xf8 0x00 0x02 0x00 0x09 0x00 0x00 0x00 +0x3d 0x00 0x04 0x00 0x04 0x00 0x00 0x00 0x0a 0x00 0x00 0x00 0x02 0x00 0x00 0x00 +0x3e 0x00 0x03 0x00 0x03 0x00 0x00 0x00 0x0a 0x00 0x00 0x00 0xfd 0x00 0x01 0x00 +0x38 0x00 0x01 0x00 +)"; + + VerifyDisassembly(kHex, kDisassembly); +} + +TEST_F(HexToText, BytesBigEndian) { + constexpr char kHex[] = R"( +X07,X23,X02,X03, X00,X01,X06,X00, X00,X07,X00,X00, X00,X00,X00,X0B +X00,X00,X00,X00, X00,X02,X00,X11, X00,X00,X00,X01, X00,X03,X00,X0E +X00,X00,X00,X00, X00,X00,X00,X01, X00,X07,X00,X0F, X00,X00,X00,X00 +X00,X00,X00,X01, X6E,X69,X61,X6D, X00,X00,X00,X00, X00,X00,X00,X02 +X00,X00,X00,X03, X00,X06,X00,X05, X00,X00,X00,X02, X63,X6E,X61,X66 +X74,X61,X5F,X79, X62,X69,X72,X74, X00,X65,X74,X75, X00,X06,X00,X05 +X00,X00,X00,X03, X66,X65,X73,X75, X6F,X5F,X6C,X75, X75,X70,X74,X75 +X00,X00,X00,X74, X00,X04,X00,X47, X00,X00,X00,X02, X00,X00,X00,X1E +X00,X00,X00,X04, X00,X04,X00,X47, X00,X00,X00,X03, X00,X00,X00,X1E +X00,X00,X00,X02, X00,X03,X00,X16, X00,X00,X00,X04, X00,X00,X00,X20 +X00,X04,X00,X20, X00,X00,X00,X05, X00,X00,X00,X01, X00,X00,X00,X04 +X00,X04,X00,X3B, X00,X00,X00,X05, X00,X00,X00,X02, X00,X00,X00,X01 +X00,X04,X00,X20, X00,X00,X00,X06, X00,X00,X00,X03, X00,X00,X00,X04 +X00,X04,X00,X3B, X00,X00,X00,X06, X00,X00,X00,X03, X00,X00,X00,X03 +X00,X02,X00,X13, X00,X00,X00,X07, X00,X03,X00,X21, X00,X00,X00,X08 +X00,X00,X00,X07, X00,X05,X00,X36, X00,X00,X00,X07, X00,X00,X00,X01 +X00,X00,X00,X00, X00,X00,X00,X08, X00,X02,X00,XF8, X00,X00,X00,X09 +X00,X04,X00,X3D, X00,X00,X00,X04, X00,X00,X00,X0A, X00,X00,X00,X02 +X00,X03,X00,X3E, X00,X00,X00,X03, X00,X00,X00,X0A, X00,X01,X00,XFD +X00,X01,X00,X38, +)"; + + VerifyDisassembly(kHex, kDisassembly); +} + +TEST_F(HexToText, StreamLittleEndian) { + constexpr char kHex[] = R"( +03 02 23 07 00 06 01 00 00 00 07 00 0b 00 00 00 +00 00 00 00 11 00 02 00 01 00 00 00 0e 00 03 00 +00 00 00 00 01 00 00 00 0f 00 07 00 00 00 00 00 +01 00 00 00 6d 61 69 6e 00 00 00 00 02 00 00 00 +03 00 00 00 05 00 06 00 02 00 00 00 66 61 6e 63 +79 5f 61 74 74 72 69 62 75 74 65 00 05 00 06 00 +03 00 00 00 75 73 65 66 75 6c 5f 6f 75 74 70 75 +74 00 00 00 47 00 04 00 02 00 00 00 1e 00 00 00 +04 00 00 00 47 00 04 00 03 00 00 00 1e 00 00 00 +02 00 00 00 16 00 03 00 04 00 00 00 20 00 00 00 +20 00 04 00 05 00 00 00 01 00 00 00 04 00 00 00 +3b 00 04 00 05 00 00 00 02 00 00 00 01 00 00 00 +20 00 04 00 06 00 00 00 03 00 00 00 04 00 00 00 +3b 00 04 00 06 00 00 00 03 00 00 00 03 00 00 00 +13 00 02 00 07 00 00 00 21 00 03 00 08 00 00 00 +07 00 00 00 36 00 05 00 07 00 00 00 01 00 00 00 +00 00 00 00 08 00 00 00 f8 00 02 00 09 00 00 00 +3d 00 04 00 04 00 00 00 0a 00 00 00 02 00 00 00 +3e 00 03 00 03 00 00 00 0a 00 00 00 fd 00 01 00 +38 00 01 00 +)"; + + VerifyDisassembly(kHex, kDisassembly); +} + +TEST_F(HexToText, StreamLittleEndianNoDelim) { + constexpr char kHex[] = R"( +0302230700060100000007000B000000 +0000000011000200010000000E000300 +00000000010000000F00070000000000 +010000006D61696E0000000002000000 +03000000050006000200000066616E63 +795F6174747269627574650005000600 +0300000075736566756C5F6F75747075 +7400000047000400020000001E000000 +0400000047000400030000001E000000 +02000000160003000400000020000000 +20000400050000000100000004000000 +3B000400050000000200000001000000 +20000400060000000300000004000000 +3B000400060000000300000003000000 +13000200070000002100030008000000 +07000000360005000700000001000000 +0000000008000000F800020009000000 +3D000400040000000A00000002000000 +3E000300030000000A000000FD000100 +38000100 +)"; + + VerifyDisassembly(kHex, kDisassembly); +} + +TEST_F(HexToText, StreamBigEndian) { + constexpr char kHex[] = R"( +07230203, 00010600, 00070000, 0000000b +00000000, 00020011, 00000001, 0003000e +00000000, 00000001, 0007000f, 00000000 +00000001, 6e69616d, 00000000, 00000002 +00000003, 00060005, 00000002, 636e6166 +74615f79, 62697274, 00657475, 00060005 +00000003, 66657375, 6f5f6c75, 75707475 +00000074, 00040047, 00000002, 0000001e +00000004, 00040047, 00000003, 0000001e +00000002, 00030016, 00000004, 00000020 +00040020, 00000005, 00000001, 00000004 +0004003b, 00000005, 00000002, 00000001 +00040020, 00000006, 00000003, 00000004 +0004003b, 00000006, 00000003, 00000003 +00020013, 00000007, 00030021, 00000008 +00000007, 00050036, 00000007, 00000001 +00000000, 00000008, 000200f8, 00000009 +0004003d, 00000004, 0000000a, 00000002 +0003003e, 00000003, 0000000a, 000100fd +00010038, +)"; + + VerifyDisassembly(kHex, kDisassembly); +} + +TEST_F(HexToText, WordsNoDelimieter) { + constexpr char kHex[] = R"(0x07230203 0x00010600 0x00070000 0x0000000b +0x00000000 0x00020011 0x00000001 0x0003000e +0x00000000 0x00000001 0x0007000f 0x00000000 +0x00000001 0x6e69616d 0x00000000 0x00000002 +0x00000003 0x00060005 0x00000002 0x636e6166 +0x74615f79 0x62697274 0x00657475 0x00060005 +0x00000003 0x666573750x6f5f6c75 0x75707475 +0x00000074 0x00040047 0x00000002 0x0000001e +0x00000004 0x00040047 0x00000003 0x0000001e +0x00000002 0x00030016 0x00000004 0x00000020 +0x00040020 0x00000005 0x00000001 0x00000004 +0x0004003b 0x00000005 0x00000002 0x00000001 +0x00040020 0x00000006 0x00000003 0x00000004 +0x0004003b 0x00000006 0x00000003 0x00000003 +0x00020013 0x00000007 0x00030021 0x00000008 +0x00000007 0x00050036 0x00000007 0x00000001 +0x00000000 0x00000008 0x000200f8 0x00000009 +0x0004003d 0x00000004 0x0000000a 0x00000002 +0x0003003e 0x00000003 0x0000000a 0x000100fd +0x00010038)"; + + EnsureError(kHex); +} + +TEST_F(HexToText, InvalidFirstToken) { + constexpr char kHex[] = R"(0x17230203, 0x00010600, 0x00070000, 0x0000000b +0x00000000, 0x00020011, 0x00000001, 0x0003000e +0x00000000, 0x00000001, 0x0007000f, 0x00000000 +0x00000001, 0x6e69616d, 0x00000000, 0x00000002 +0x00000003, 0x00060005, 0x00000002, 0x636e6166 +0x74615f79, 0x62697274, 0x00657475, 0x00060005 +0x00000003, 0x66657375, 0x6f5f6c75, 0x75707475 +0x00000074, 0x00040047, 0x00000002, 0x0000001e +0x00000004, 0x00040047, 0x00000003, 0x0000001e +0x00000002, 0x00030016, 0x00000004, 0x00000020 +0x00040020, 0x00000005, 0x00000001, 0x00000004 +0x0004003b, 0x00000005, 0x00000002, 0x00000001 +0x00040020, 0x00000006, 0x00000003, 0x00000004 +0x0004003b, 0x00000006, 0x00000003, 0x00000003 +0x00020013, 0x00000007, 0x00030021, 0x00000008 +0x00000007, 0x00050036, 0x00000007, 0x00000001 +0x00000000, 0x00000008, 0x000200f8, 0x00000009 +0x0004003d, 0x00000004, 0x0000000a, 0x00000002 +0x0003003e, 0x00000003, 0x0000000a, 0x000100fd +0x00010038)"; + + EnsureError(kHex); +} + +TEST_F(HexToText, NonHexCharacter) { + // Note: a 6 is replaced with G in this stream + constexpr char kHex[] = R"(0x07230203, 0x00010600, 0x00070000, 0x0000000b +0x00000000, 0x00020011, 0x00000001, 0x0003000e +0x00000000, 0x00000001, 0x0007000f, 0x00000000 +0x00000001, 0x6e69616d, 0x00000000, 0x00000002 +0x00000003, 0x00060005, 0x00000002, 0x636e6166 +0x74615f79, 0x62697274, 0x00657475, 0x00060005 +0x00000003, 0x66657375, 0x6f5f6c75, 0x75707475 +0x00000074, 0x00040047, 0x00000002, 0x0000001e +0x00000004, 0x00040047, 0x00000003, 0x0000001e +0x00000002, 0x0003001G, 0x00000004, 0x00000020 +0x00040020, 0x00000005, 0x00000001, 0x00000004 +0x0004003b, 0x00000005, 0x00000002, 0x00000001 +0x00040020, 0x00000006, 0x00000003, 0x00000004 +0x0004003b, 0x00000006, 0x00000003, 0x00000003 +0x00020013, 0x00000007, 0x00030021, 0x00000008 +0x00000007, 0x00050036, 0x00000007, 0x00000001 +0x00000000, 0x00000008, 0x000200f8, 0x00000009 +0x0004003d, 0x00000004, 0x0000000a, 0x00000002 +0x0003003e, 0x00000003, 0x0000000a, 0x000100fd +0x00010038)"; + + EnsureError(kHex); +} + +TEST_F(HexToText, MissingExpectedPrefix) { + constexpr char kHex[] = R"(0x07230203, 0x00010600, 0x00070000, 0x0000000b +0x00000000, 0x00020011, 0x00000001, 0x0003000e +0x00000000, 0x00000001, 0x0007000f, 0x00000000 +0x00000001, 0x6e69616d, 0x00000000, 0x00000002 +0x00000003, 0x00060005, 0x00000002, 0x636e6166 +0x74615f79, 0x62697274, 0x00657475, 0x00060005 +0x00000003, 0x66657375, 0x6f5f6c75, 0x75707475 +0x00000074, 0x00040047, 0x00000002, 0x0000001e +0x00000004, 0x00040047, 0x00000003, 0x0000001e +0x00000002, 0x00030016, 0x00000004, 0x00000020 +0x00040020, 0x00000005, 00000001, 0x00000004 +0x0004003b, 0x00000005, 0x00000002, 0x00000001 +0x00040020, 0x00000006, 0x00000003, 0x00000004 +0x0004003b, 0x00000006, 0x00000003, 0x00000003 +0x00020013, 0x00000007, 0x00030021, 0x00000008 +0x00000007, 0x00050036, 0x00000007, 0x00000001 +0x00000000, 0x00000008, 0x000200f8, 0x00000009 +0x0004003d, 0x00000004, 0x0000000a, 0x00000002 +0x0003003e, 0x00000003, 0x0000000a, 0x000100fd +0x00010038)"; + + EnsureError(kHex); +} + +TEST_F(HexToText, UnexpectedPrefix) { + constexpr char kHex[] = R"(07230203, 00010600, 00070000, 0000000b +00000000, 00020011, 00000001, 0003000e +00000000, 00000001, 0007000f, 00000000 +00000001, 6e69616d, 00000000, 00000002 +00000003, 00060005, 00000002, 636e6166 +74615f79, 62697274, 00657475, 00060005 +00000003, 66657375, 6f5f6c75, 75707475 +00000074, 00040047, 00000002, 0000001e +00000004, 00040047, 00000003, 0000001e +00000002, 00030016, 00000004, 00000020 +00040020, 00000005, 0x00000001, 00000004 +0004003b, 00000005, 00000002, 00000001 +00040020, 00000006, 00000003, 00000004 +0004003b, 00000006, 00000003, 00000003 +00020013, 00000007, 00030021, 00000008 +00000007, 00050036, 00000007, 00000001 +00000000, 00000008, 000200f8, 00000009 +0004003d, 00000004, 0000000a, 00000002 +0003003e, 00000003, 0000000a, 000100fd +00010038)"; + + EnsureError(kHex); +} +} // namespace +} // namespace spvtools diff --git a/tools/dis/dis.cpp b/tools/dis/dis.cpp index 6b8a1ae637..c294d039ce 100644 --- a/tools/dis/dis.cpp +++ b/tools/dis/dis.cpp @@ -33,6 +33,10 @@ Usage: %s [options] [] The SPIR-V binary is read from . If no file is specified, or if the filename is "-", then the binary is read from standard input. +A text-based hex stream is also accepted as binary input, which should either +consist of 32-bit words or 8-bit bytes. The 0x or x prefix is optional, but +should be consistently present in the stream. + Options: -h, --help Print this help. diff --git a/tools/io.cpp b/tools/io.cpp index 288f380ad8..9c7d21f723 100644 --- a/tools/io.cpp +++ b/tools/io.cpp @@ -15,6 +15,8 @@ #include "io.h" #include +#include +#include #if defined(SPIRV_WINDOWS) #include @@ -40,7 +42,7 @@ template void ReadFile(FILE* file, std::vector* data) { if (file == nullptr) return; - const int buf_size = 1024; + const int buf_size = 4096 / sizeof(T); T buf[buf_size]; while (size_t len = fread(buf, sizeof(T), buf_size, file)) { data->insert(data->end(), buf, buf + len); @@ -48,9 +50,7 @@ void ReadFile(FILE* file, std::vector* data) { } // Returns true if |file| has encountered an error opening the file or reading -// the file as a series of element of type |T|. If there was an error, writes an -// error message to standard error. -template +// from it. If there was an error, writes an error message to standard error. bool WasFileCorrectlyRead(FILE* file, const char* filename) { if (file == nullptr) { fprintf(stderr, "error: file does not exist '%s'\n", filename); @@ -62,17 +62,291 @@ bool WasFileCorrectlyRead(FILE* file, const char* filename) { fprintf(stderr, "error: error reading file '%s'\n", filename); return false; } - } else { - if (sizeof(T) != 1 && (ftell(file) % sizeof(T))) { - fprintf( - stderr, - "error: file size should be a multiple of %zd; file '%s' corrupt\n", - sizeof(T), filename); + } + return true; +} + +// Ensure the file contained an exact number of elements, whose size is given in +// |alignment|. +bool WasFileSizeAligned(const char* filename, size_t read_size, + size_t alignment) { + assert(alignment != 1); + if ((read_size % alignment) != 0) { + fprintf(stderr, + "error: file size should be a multiple of %zd; file '%s' corrupt\n", + alignment, filename); + return false; + } + return true; +} + +// Different formats the hex is expected to be in. +enum class HexMode { + // 0x07230203, ... + Words, + // 0x07, 0x23, 0x02, 0x03, ... + BytesBigEndian, + // 0x03, 0x02, 0x23, 0x07, ... + BytesLittleEndian, + // 07 23 02 03 ... + StreamBigEndian, + // 03 02 23 07 ... + StreamLittleEndian, +}; + +// Whether a character should be skipped as whitespace / separator / +// end-of-file. +bool IsSpace(char c) { return isspace(c) || c == ',' || c == '\0'; } + +bool IsHexStream(const std::vector& stream) { + for (char c : stream) { + if (IsSpace(c)) { + continue; + } + + // Every possible case of a SPIR-V hex stream starts with either '0' or 'x' + // (see |HexMode| values). Make a decision upon inspecting the first + // non-space character. + return c == '0' || c == 'x' || c == 'X'; + } + + return false; +} + +bool MatchIgnoreCase(const char* token, const char* expect, size_t len) { + for (size_t i = 0; i < len; ++i) { + if (tolower(token[i]) != tolower(expect[i])) { return false; } } + return true; } + +// Helper class to tokenize a hex stream +class HexTokenizer { + public: + HexTokenizer(const char* filename, const std::vector& stream, + std::vector* data) + : filename_(filename), stream_(stream), data_(data) { + DetermineMode(); + } + + bool Parse() { + while (current_ < stream_.size() && !encountered_error_) { + data_->push_back(GetNextWord()); + + // Make sure trailing space does not lead to parse error by skipping it + // and exiting the loop. + SkipSpace(); + } + + return !encountered_error_; + } + + private: + void ParseError(const char* reason) { + if (!encountered_error_) { + fprintf(stderr, + "error: hex stream parse error at character %zu: %s in '%s'\n", + current_, reason, filename_); + encountered_error_ = true; + } + } + + // Skip whitespace until the next non-whitespace non-comma character. + void SkipSpace() { + while (current_ < stream_.size()) { + char c = stream_[current_]; + if (!IsSpace(c)) { + return; + } + + ++current_; + } + } + + // Skip the 0x or x at the beginning of a hex value. + void Skip0x() { + // The first character must be 0 or x. + const char first = Next(); + if (first != '0' && first != 'x' && first != 'X') { + ParseError("expected 0x or x"); + } else if (first == '0') { + const char second = Next(); + if (second != 'x' && second != 'X') { + ParseError("expected 0x"); + } + } + } + + // Consume the next character. + char Next() { return current_ < stream_.size() ? stream_[current_++] : '\0'; } + + // Determine how to read the hex stream based on the first token. + void DetermineMode() { + SkipSpace(); + + // Read 11 bytes, that is the size of the biggest token (10) + one more. + char first_token[11]; + for (uint32_t i = 0; i < 11; ++i) { + first_token[i] = Next(); + } + + // Table of how to match the first token with a mode. + struct { + const char* expect; + bool must_have_delimiter; + HexMode mode; + } parse_info[] = { + {"0x07230203", true, HexMode::Words}, + {"0x7230203", true, HexMode::Words}, + {"x07230203", true, HexMode::Words}, + {"x7230203", true, HexMode::Words}, + + {"0x07", true, HexMode::BytesBigEndian}, + {"0x7", true, HexMode::BytesBigEndian}, + {"x07", true, HexMode::BytesBigEndian}, + {"x7", true, HexMode::BytesBigEndian}, + + {"0x03", true, HexMode::BytesLittleEndian}, + {"0x3", true, HexMode::BytesLittleEndian}, + {"x03", true, HexMode::BytesLittleEndian}, + {"x3", true, HexMode::BytesLittleEndian}, + + {"07", false, HexMode::StreamBigEndian}, + {"03", false, HexMode::StreamLittleEndian}, + }; + + // Check to see if any of the possible first tokens are matched. If not, + // this is not a recognized hex stream. + encountered_error_ = true; + for (const auto& info : parse_info) { + const size_t expect_len = strlen(info.expect); + const bool matches_expect = + MatchIgnoreCase(first_token, info.expect, expect_len); + const bool satisfies_delimeter = + !info.must_have_delimiter || IsSpace(first_token[expect_len]); + if (matches_expect && satisfies_delimeter) { + mode_ = info.mode; + encountered_error_ = false; + break; + } + } + + if (encountered_error_) { + fprintf(stderr, + "error: hex format detected, but pattern '%.11s' is not " + "recognized '%s'\n", + first_token, filename_); + } + + // Reset the position to restart parsing with the determined mode. + current_ = 0; + } + + // Consume up to |max_len| characters and put them in |token_chars|. A + // delimiter is expected. The resulting string is NUL-terminated. + void NextN(char token_chars[9], size_t max_len) { + assert(max_len < 9); + + for (size_t i = 0; i <= max_len; ++i) { + char c = Next(); + if (IsSpace(c)) { + token_chars[i] = '\0'; + return; + } + + token_chars[i] = c; + if (!isxdigit(c)) { + ParseError("encountered non-hex character"); + } + } + + // If space is not reached before the maximum number of characters where + // consumed, that's an error. + ParseError("expected delimiter (space or comma)"); + token_chars[max_len] = '\0'; + } + + // Consume one hex digit. + char NextHexDigit() { + char c = Next(); + if (!isxdigit(c)) { + ParseError("encountered non-hex character"); + } + return c; + } + + // Extract a token out of the stream. It could be either a word or a byte, + // based on |mode_|. + uint32_t GetNextToken() { + SkipSpace(); + + // The longest token can be 8 chars (for |HexMode::Words|), add one for + // '\0'. + char token_chars[9]; + + switch (mode_) { + case HexMode::Words: + case HexMode::BytesBigEndian: + case HexMode::BytesLittleEndian: + // Start with 0x, followed by up to 8 (for Word) or 2 (for Byte*) + // digits. + Skip0x(); + NextN(token_chars, mode_ == HexMode::Words ? 8 : 2); + break; + case HexMode::StreamBigEndian: + case HexMode::StreamLittleEndian: + // Always expected to see two consecutive hex digits. + token_chars[0] = NextHexDigit(); + token_chars[1] = NextHexDigit(); + token_chars[2] = '\0'; + break; + } + + if (encountered_error_) { + return 0; + } + + // Parse the hex value that was just read. + return static_cast(strtol(token_chars, nullptr, 16)); + } + + // Construct a word out of tokens + uint32_t GetNextWord() { + if (mode_ == HexMode::Words) { + return GetNextToken(); + } + + uint32_t tokens[4] = { + GetNextToken(), + GetNextToken(), + GetNextToken(), + GetNextToken(), + }; + + switch (mode_) { + case HexMode::BytesBigEndian: + case HexMode::StreamBigEndian: + return tokens[0] << 24 | tokens[1] << 16 | tokens[2] << 8 | tokens[3]; + case HexMode::BytesLittleEndian: + case HexMode::StreamLittleEndian: + return tokens[3] << 24 | tokens[2] << 16 | tokens[1] << 8 | tokens[0]; + default: + assert(false); + return 0; + } + } + + const char* filename_; + const std::vector& stream_; + std::vector* data_; + + HexMode mode_ = HexMode::Words; + size_t current_ = 0; + bool encountered_error_ = false; +}; } // namespace bool ReadBinaryFile(const char* filename, std::vector* data) { @@ -87,12 +361,39 @@ bool ReadBinaryFile(const char* filename, std::vector* data) { fp = stdin; } - ReadFile(fp, data); - bool succeeded = WasFileCorrectlyRead(fp, filename); + // Read into a char vector first. If this is a hex stream, it needs to be + // processed as such. + std::vector data_raw; + ReadFile(fp, &data_raw); + bool succeeded = WasFileCorrectlyRead(fp, filename); if (use_file && fp) fclose(fp); + + if (!succeeded) { + return false; + } + + if (IsHexStream(data_raw)) { + // If a hex stream, parse it and fill |data|. + HexTokenizer tokenizer(filename, data_raw, data); + succeeded = tokenizer.Parse(); + } else { + // If not a hex stream, convert it to uint32_t via memcpy. + succeeded = WasFileSizeAligned(filename, data_raw.size(), sizeof(uint32_t)); + if (succeeded) { + data->resize(data_raw.size() / sizeof(uint32_t), 0); + memcpy(data->data(), data_raw.data(), data_raw.size()); + } + } + return succeeded; } +bool ConvertHexToBinary(const std::vector& stream, + std::vector* data) { + HexTokenizer tokenizer("", stream, data); + return tokenizer.Parse(); +} + bool ReadTextFile(const char* filename, std::vector* data) { assert(data->empty()); @@ -106,7 +407,7 @@ bool ReadTextFile(const char* filename, std::vector* data) { } ReadFile(fp, data); - bool succeeded = WasFileCorrectlyRead(fp, filename); + bool succeeded = WasFileCorrectlyRead(fp, filename); if (use_file && fp) fclose(fp); return succeeded; } diff --git a/tools/io.h b/tools/io.h index 3c87fcc0d1..536009d531 100644 --- a/tools/io.h +++ b/tools/io.h @@ -25,8 +25,28 @@ // file. If |filename| is nullptr or "-", reads from the standard input, but // reopened as a binary file. If any error occurs, writes error messages to // standard error and returns false. +// +// If the given input is detected to be in ascii hex, it is converted to binary +// automatically. In that case, the shape of the input data is determined based +// on the representation of the magic number: +// +// * "[0]x[0]7230203": Every following "0x..." represents a word. +// * "[0]x[0]7[,] [0]x23...": Every following "0x..." represents a byte, stored +// in big-endian order +// * "[0]x[0]3[,] [0]x[0]2...": Every following "0x..." represents a byte, +// stored in little-endian order +// * "07[, ]23...": Every following "XY" represents a byte, stored in +// big-endian order +// * "03[, ]02...": Every following "XY" represents a byte, stored in +// little-endian order bool ReadBinaryFile(const char* filename, std::vector* data); +// The hex->binary logic of |ReadBinaryFile| applied to a pre-loaded stream of +// bytes. Used by tests to avoid having to call |ReadBinaryFile| with temp +// files. Returns false in case of parse errors. +bool ConvertHexToBinary(const std::vector& stream, + std::vector* data); + // Sets the contents of the file named |filename| in |data|, assuming each // element in the file is of type |char|. The file is opened as a text file. If // |filename| is nullptr or "-", reads from the standard input, but reopened as