diff --git a/External/CMakeLists.txt b/External/CMakeLists.txt index 785d22fb0..920dfa6ac 100644 --- a/External/CMakeLists.txt +++ b/External/CMakeLists.txt @@ -5,6 +5,7 @@ add_subdirectory(HeCBench) add_subdirectory(Nurbs) add_subdirectory(Povray) add_subdirectory(SPEC) +add_subdirectory(dav1d) add_subdirectory(skidmarks10) add_subdirectory(sollve_vv) add_subdirectory(smoke) diff --git a/External/SPEC/SpecCPU2017.cmake b/External/SPEC/SpecCPU2017.cmake index 39e58c00c..f84f94186 100644 --- a/External/SPEC/SpecCPU2017.cmake +++ b/External/SPEC/SpecCPU2017.cmake @@ -157,6 +157,8 @@ macro (speccpu2017_benchmark) elseif (ARCH STREQUAL "AArch64" AND TARGET_OS STREQUAL "Linux" AND CMAKE_SIZEOF_VOID_P EQUAL 8) # Linux ARM list(APPEND SPEC_COMMON_DEFS "-DSPEC_LINUX_AARCH64") + elseif (ARCH STREQUAL "LoongArch") + list(APPEND SPEC_COMMON_DEFS "-DSPEC_MANUAL_CONFIG") elseif (ARCH STREQUAL "riscv64") list(APPEND SPEC_COMMON_DEFS "-DSPEC_MANUAL_CONFIG") elseif (ARCH STREQUAL "x86" AND TARGET_OS STREQUAL "Windows") diff --git a/External/dav1d/CMakeLists.txt b/External/dav1d/CMakeLists.txt new file mode 100644 index 000000000..9968187ed --- /dev/null +++ b/External/dav1d/CMakeLists.txt @@ -0,0 +1,371 @@ +include(External) + +# git clone -b 1.5.0 https://code.videolan.org/videolan/dav1d.git +# in llvm-test-suite/test-suite-externals. + +llvm_externals_find(TEST_SUITE_DAV1D_ROOT "dav1d" "dav1d 1.5.0") + +if (NOT TEST_SUITE_DAV1D_ROOT) + return() +endif() + +include(CheckCCompilerFlag) +include(CheckFunctionExists) +include(CheckLanguage) +include(CheckLibraryExists) +include(CheckLinkerFlag) + +set(CMAKE_C_STANDARD 17) + +include_directories(.) +include_directories(${TEST_SUITE_DAV1D_ROOT}/include) +include_directories(${TEST_SUITE_DAV1D_ROOT}/include/dav1d) +include_directories(${TEST_SUITE_DAV1D_ROOT}) +include_directories(${TEST_SUITE_DAV1D_ROOT}/src) + +if (WIN32) + include_directories(${TEST_SUITE_DAV1D_ROOT}/include/compat) +endif() + +# Convenience helper for adding an option if it is supported, automatically +# setting up suitable cache variables for the tests. +function(check_enable_option option) + if (${option} MATCHES "^-Wno") + # GCC silently accepts any unknown warning class in options like -Wno-foo, + # but such unrecognized options can produce other distracting notices + # if there actual warnings to print. Therefore, for options like -Wno-foo, + # test whether -Wfoo is supported instead, and if it is, add -Wno-foo. + string(REGEX REPLACE "^-Wno-" "-W" test_option ${option}) + else() + set(test_option ${option}) + endif() + # Transform the option name into a suitable cmake cache variable name, to + # avoid requiring the caller to uniquely set one for each case. + string(REGEX REPLACE "^--*" "" varname ${test_option}) + string(TOUPPER ${varname} varname) + string(REGEX REPLACE "[-=]" "_" varname ${varname}) + set(varname "SUPPORTS_${varname}") + check_c_compiler_flag(${test_option} ${varname}) + if (${varname}) + # If supported, enable the original form of the option that was requested. + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${option}" PARENT_SCOPE) + endif() +endfunction() + +if (NOT MSVC) + # clang-cl supports -Wall, but it corresponds to -Weverything + check_enable_option(-Wall) +endif() + +check_enable_option(-Wundef) +check_enable_option(-Werror=vla) +check_enable_option(-Wno-maybe-uninitialized) +check_enable_option(-Wno-missing-field-initializers) +check_enable_option(-Wno-unused-parameter) +check_enable_option(-Wstrict-prototypes) +check_enable_option(-Werror=missing-prototypes) +check_enable_option(-Wshorten-64-to-32) + +check_function_exists(sin HAVE_DEFAULT_MATH) +if (NOT HAVE_DEFAULT_MATH) + check_library_exists(m sin "" HAVE_LIBM) + if (HAVE_LIBM) + link_libraries(m) + endif() +endif() +check_library_exists(atomic __atomic_load_8 "" HAVE_LIBATOMIC) +if (HAVE_LIBATOMIC) + link_libraries(atomic) +endif() +if (NOT WIN32) + find_package(Threads) + if (Threads_FOUND) + link_libraries(${CMAKE_THREAD_LIBS_INIT}) + endif() +endif() + +if (WIN32) + add_compile_definitions(WIN32_LEAN_AND_MEAN) + if (MSVC) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + add_compile_definitions(_CRT_NONSTDC_NO_DEPRECATE) + endif() +endif() + +if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "arm64") + set(ARCH_AARCH64 1) + enable_language(ASM) + message(STATUS "dav1d: Enabling aarch64 assembly") +elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^arm") + set(ARCH_ARM 1) + enable_language(ASM) + message(STATUS "dav1d: Enabling arm assembly") +elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^i.86$" OR CMAKE_SYSTEM_PROCESSOR MATCHES "^[Xx]86$") + set(ARCH_I386 1) + check_language(ASM_NASM) + if (CMAKE_ASM_NASM_COMPILER) + enable_language(ASM_NASM) + message(STATUS "dav1d: Enabling i386 nasm assembly") + else() + add_compile_definitions(NO_X86ASM) + message(STATUS "dav1d: Not enabling i386 nasm assembly") + endif() + if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "Windows") + set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DPREFIX") + endif() +elseif (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "AMD64" OR CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64") + set(ARCH_X86_64 1) + add_compile_definitions(PIC) + check_language(ASM_NASM) + if (CMAKE_ASM_NASM_COMPILER) + enable_language(ASM_NASM) + message(STATUS "dav1d: Enabling x86_64 nasm assembly") + else() + add_compile_definitions(NO_X86ASM) + message(STATUS "dav1d: Not enabling x86_64 nasm assembly") + endif() + set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DARCH_X86_64=1") + if (CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -DPREFIX") + endif() +else() + message(STATUS "dav1d: Not enabling any assembly optimizations for ${CMAKE_SYSTEM_PROCESSOR}") +endif() + +# src + +set(dav1d_src + cdf.c + cpu.c + ctx.c + data.c + decode.c + dequant_tables.c + getbits.c + intra_edge.c + itx_1d.c + lf_mask.c + lib.c + log.c + mem.c + msac.c + obu.c + pal.c + picture.c + qm.c + ref.c + refmvs.c + scan.c + tables.c + thread_task.c + warpmv.c + wedge.c) + +if (WIN32) + list(APPEND dav1d_src + win32/thread.c) +endif() + +set(dav1d_tmpl_src + cdef_apply_tmpl.c + cdef_tmpl.c + fg_apply_tmpl.c + filmgrain_tmpl.c + ipred_prepare_tmpl.c + ipred_tmpl.c + itx_tmpl.c + lf_apply_tmpl.c + loopfilter_tmpl.c + looprestoration_tmpl.c + lr_apply_tmpl.c + mc_tmpl.c + recon_tmpl.c) + +if (ARCH_AARCH64) + list(APPEND dav1d_src + arm/cpu.c + arm/64/itx.S + arm/64/looprestoration_common.S + arm/64/msac.S + arm/64/refmvs.S + arm/64/cdef.S + arm/64/filmgrain.S + arm/64/ipred.S + arm/64/loopfilter.S + arm/64/looprestoration.S + arm/64/mc.S + arm/64/mc_dotprod.S + arm/64/cdef16.S + arm/64/filmgrain16.S + arm/64/ipred16.S + arm/64/itx16.S + arm/64/loopfilter16.S + arm/64/looprestoration16.S + arm/64/mc16.S + arm/64/mc16_sve.S) +elseif (ARCH_ARM) + list(APPEND dav1d_src + arm/cpu.c + arm/32/itx.S + arm/32/looprestoration_common.S + arm/32/msac.S + arm/32/refmvs.S + arm/32/cdef.S + arm/32/filmgrain.S + arm/32/ipred.S + arm/32/loopfilter.S + arm/32/looprestoration.S + arm/32/mc.S + arm/32/cdef16.S + arm/32/filmgrain16.S + arm/32/ipred16.S + arm/32/itx16.S + arm/32/loopfilter16.S + arm/32/looprestoration16.S + arm/32/mc16.S) +elseif (ARCH_I386 OR ARCH_X86_64) + list(APPEND dav1d_src + x86/cpu.c) + if (CMAKE_ASM_NASM_COMPILER) + set(x86_nasm_sources + x86/cpuid.asm + x86/msac.asm + x86/pal.asm + x86/refmvs.asm + x86/itx_avx512.asm + x86/cdef_avx2.asm + x86/itx_avx2.asm + x86/cdef_sse.asm + x86/itx_sse.asm + x86/cdef_avx512.asm + x86/filmgrain_avx512.asm + x86/ipred_avx512.asm + x86/loopfilter_avx512.asm + x86/looprestoration_avx512.asm + x86/mc_avx512.asm + x86/filmgrain_avx2.asm + x86/ipred_avx2.asm + x86/loopfilter_avx2.asm + x86/looprestoration_avx2.asm + x86/mc_avx2.asm + x86/filmgrain_sse.asm + x86/ipred_sse.asm + x86/loopfilter_sse.asm + x86/looprestoration_sse.asm + x86/mc_sse.asm + x86/cdef16_avx512.asm + x86/filmgrain16_avx512.asm + x86/ipred16_avx512.asm + x86/itx16_avx512.asm + x86/loopfilter16_avx512.asm + x86/looprestoration16_avx512.asm + x86/mc16_avx512.asm + x86/cdef16_avx2.asm + x86/filmgrain16_avx2.asm + x86/ipred16_avx2.asm + x86/itx16_avx2.asm + x86/loopfilter16_avx2.asm + x86/looprestoration16_avx2.asm + x86/mc16_avx2.asm + x86/cdef16_sse.asm + x86/filmgrain16_sse.asm + x86/ipred16_sse.asm + x86/itx16_sse.asm + x86/loopfilter16_sse.asm + x86/looprestoration16_sse.asm + x86/mc16_sse.asm) + list(APPEND dav1d_src + ${x86_nasm_sources}) + list(TRANSFORM x86_nasm_sources PREPEND ${TEST_SUITE_DAV1D_ROOT}/src/) + set_source_files_properties(${x86_nasm_sources} PROPERTIES LANGUAGE ASM_NASM) + endif() +endif() + +list(TRANSFORM dav1d_tmpl_src PREPEND ${TEST_SUITE_DAV1D_ROOT}/src/) +list(TRANSFORM dav1d_src PREPEND ${TEST_SUITE_DAV1D_ROOT}/src/) + +foreach(bitdepth 8 16) + llvm_test_library(dav1d_bitdepth_${bitdepth} OBJECT ${dav1d_tmpl_src}) + target_compile_definitions(dav1d_bitdepth_${bitdepth} PRIVATE -DBITDEPTH=${bitdepth}) + list(APPEND bitdepth_libraries dav1d_bitdepth_${bitdepth}) +endforeach() + +llvm_test_library(dav1d_lib ${dav1d_src}) +target_link_libraries(dav1d_lib LINK_PRIVATE ${bitdepth_libraries}) + + +# tools + +set(dav1d_cli_src + dav1d.c + dav1d_cli_parse.c + input/input.c + input/annexb.c + input/ivf.c + input/section5.c + output/md5.c + output/null.c + output/output.c + output/y4m2.c + output/yuv.c) + +if (WIN32) + list(APPEND dav1d_cli_src + compat/getopt.c) +endif() + +list(TRANSFORM dav1d_cli_src PREPEND ${TEST_SUITE_DAV1D_ROOT}/tools/) + +llvm_test_executable_no_test(dav1d ${dav1d_cli_src}) + +target_include_directories(dav1d PRIVATE ${TEST_SUITE_DAV1D_ROOT}/tools) +target_link_libraries(dav1d PRIVATE dav1d_lib) + + +# checkasm + +set(checkasm_src + checkasm.c + msac.c + pal.c + refmvs.c) + +set(checkasm_tmpl_src + cdef.c + filmgrain.c + ipred.c + itx.c + loopfilter.c + looprestoration.c + mc.c) + +if (ARCH_AARCH64) + list(APPEND checkasm_src + arm/checkasm_64.S) +elseif (ARCH_ARM) + list(APPEND checkasm_src + arm/checkasm_32.S) +elseif (ARCH_I386 OR ARCH_X86_64) + if (CMAKE_ASM_NASM_COMPILER) + set(x86_nasm_sources + x86/checkasm.asm) + list(APPEND checkasm_src + ${x86_nasm_sources}) + list(TRANSFORM x86_nasm_sources PREPEND ${TEST_SUITE_DAV1D_ROOT}/tests/checkasm/) + set_source_files_properties(${x86_nasm_sources} PROPERTIES LANGUAGE ASM_NASM) + endif() +endif() + +list(TRANSFORM checkasm_tmpl_src PREPEND ${TEST_SUITE_DAV1D_ROOT}/tests/checkasm/) +list(TRANSFORM checkasm_src PREPEND ${TEST_SUITE_DAV1D_ROOT}/tests/checkasm/) + +foreach(bitdepth 8 16) + llvm_test_library(checkasm_bitdepth_${bitdepth} OBJECT ${checkasm_tmpl_src}) + target_compile_definitions(checkasm_bitdepth_${bitdepth} PRIVATE -DBITDEPTH=${bitdepth}) + list(APPEND bitdepth_libraries checkasm_bitdepth_${bitdepth}) +endforeach() + +llvm_test_run() +llvm_test_executable(dav1d_checkasm ${checkasm_src}) +target_link_libraries(dav1d_checkasm LINK_PRIVATE ${bitdepth_libraries}) +target_link_libraries(dav1d_checkasm PRIVATE dav1d_lib) diff --git a/External/dav1d/README.md b/External/dav1d/README.md new file mode 100644 index 000000000..7db5c3816 --- /dev/null +++ b/External/dav1d/README.md @@ -0,0 +1,255 @@ +dav1d +===== + +dav1d is a highly optimized video decoding library for the AV1 video format. + + +Setup +----- + +This integration of dav1d into llvm-test-suite works with dav1d 1.5.0. + +To include the dav1d library in llvm-test-suite, run +`git clone -b 1.5.0 https://code.videolan.org/videolan/dav1d.git` +within the `llvm-test-suite/test-suite-externals` directory, or +set `TEST_SUITE_DAV1D_ROOT` to point to a similar checkout, in the +CMake configuration. + +For x86 targets, the `nasm` tool is used for building assembly, if +the tool is found at configure time. If not found, the assembly is +omitted. The project also contains assembly for ARM and AArch64, but +that doesn't require any separate tool for building, it is built by +the regular GAS style assembler (via the compiler driver). + +The upstream project also contains some amount of assembly for other +architectures, but that is not currently hooked up in the integration +into llvm-test-suite. + + +Build targets +------------- + +The integration of dav1d into llvm-test-suite builds two targets; +the `dav1d` command line executable (which can decode AV1 video from +`.ivf` files), and `dav1d_checkasm`, a testing tool. The latter is +executed as part of running the llvm-test-suite tests. + + +checkasm +-------- + +The checkasm tool is originally intended for developing handwritten +SIMD optimized versions of functions - both for testing their +correctness and for benchmarking them. + +The correctness tests work by comparing the outputs of a reference C +implementation of each function with the outputs of handwritten SIMD +optimized versions. The same comparison also works in reverse; if the +reference C code gets miscompiled, the correctness test should point out +a discrepancy. By just running this executable without any arguments, +it tests all variants of all enabled functions. + +If there is only one implementation of a function (i.e. only the +reference C implementation), there is nothing to compare against, so +such miscompilations wouldn't be caught. + +However, miscompilations that show up as failed asserts within LLVM +when generating code are caught even if there is no assembly +available. + + +Benchmarking with checkasm +-------------------------- + +If benchmarking on AArch64 on Linux, see the section below for +gotchas regarding that. + +While the checkasm tool primarily is intended for benchmarking and +developing handwritten SIMD implementations, it can also be used +for benchmarking and evaluating the performance of the compiler +generated code for the reference C implementations. + +The most highlevel benchmark would be to record the runtime of +one full run of the `dav1d_checkasm` binary, and compare that between +different builds - however this is far from ideal; it only runs each +function a couple of times (as it only runs a correctness test), and +the total runtime depends on the number of SIMD implementations and +which of those implementations are supported by the current CPU. + +The ideal use of the checkasm tool is for microbenchmarking +individual functions. + +As an initial entry level case, one can benchmark all included functions +by running `External/dav1d/dav1d_checkasm --bench 0`. As each benchmarked +function is run a large number of times, this can take a long time +(a couple of minutes). To reduce the runtime of it, one can edit +`dav1d/tests/checkasm/checkasm.h` and change +`#define BENCH_RUNS (1 << 12)` into e.g. `#define BENCH_RUNS (1 << 10)` +to reduce the number of iterations. + +The last argument, `0`, sets the random seed for the execution. All +tests run with random input data; in many tests, the actual values of +the input data doesn't affect the runtime, but some tests can be +affected; therefore, it's good practice to run all benchmarks in a +comparison with the same seed. + +An example of parts of the output of such a benchmark looks like this: + +``` +mc_8tap_regular_w4_hv_8bpc_c: 15.3 ( 1.00x) +mc_8tap_regular_w4_hv_8bpc_neon: 1.8 ( 8.44x) +mc_8tap_regular_w4_hv_8bpc_dotprod: 1.4 (11.22x) +[...] +mc_8tap_regular_w128_h_8bpc_c: 394.5 ( 1.00x) +mc_8tap_regular_w128_h_8bpc_neon: 121.4 ( 3.25x) +mc_8tap_regular_w128_h_8bpc_dotprod: 68.2 ( 5.78x) +mc_8tap_regular_w128_hv_8bpc_c: 702.3 ( 1.00x) +mc_8tap_regular_w128_hv_8bpc_neon: 289.2 ( 2.43x) +mc_8tap_regular_w128_hv_8bpc_dotprod: 183.1 ( 3.84x) +``` + +This is a case where the same function, `mc_8tap_regular`, has been +executed with a number of different cases that are relevant for +use in the video decoder; `w4` means that it was run on a block +of width 4 pixels, and the suffixes `h` or `hv` indicates different +parameters that usually pick different codepaths within the +function. (To be precise, in this case it indicates whether the +function does horizontal filter, vertical, both, or no filtering at +all.) Each function may have different specialized cases that are +benchmarked separately. + +The numbers indicate that e.g. the reference C version of +`mc_8tap_regular_w128_hv` executed in 702 timer units, while +the handwritten NEON and DotProd versions took 289 and 183 timer +units each, respectively. The handwritten versions usually exploit +a lot of extra knowledge about the functions and their uses, that the +reference C implementation and the compiler lack. However they +indicate a potential best case target for what the compiler could +do, in ideal circumstances. + +The various functions are grouped into different areas; one can +choose to run only one or some groups, by adding a parameter like +`--test=mc_8bpc` or `--test=mc_*`. + +While benchmarking, one can also limit the benchmarking to a smaller +set of functions, by adding a parameter like +`--function=mc_8tap_regular_w*_hv_*`. + + +Benchmarking on AArch64 +----------------------- + +The upstream checkasm tool is meant for benchmarking and finetuning +assembly implementations. Therefore, it uses the `pmccntr_el0` register +for high precision timing on Linux and Windows. Unfortunately, this register +is normally not accessible from userspace in Linux. One can enable access +from userspace by building and loading a kernel module, e.g. +https://code.videolan.org/janne/arm64-cycle-cnt. + +Alternatively, the `dav1d/tests/checkasm/checkasm.h` source file can be +edited, changing references to `pmccntr_el0` into `cntvct_el0`. That +timer is usually accessible from userspace, but it has much lower +precision - making it less suitable for finetuning assembly functions, +but it is still good enough for coarse performance comparisons. + +On macOS, a coarse timer that always is accessible, is used by default. + +On Windows, `pmccntr_el0` is used; this register should always be +accessible from userspace on Windows. + + +Evaulating vectorization effectiveness +-------------------------------------- + +For evaluating e.g. the effectiveness of compiler autovectorization, +do two separate builds of `dav1d_checkasm`, e.g. one set up with +`-DCMAKE_C_FLAGS_RELEASE="-O3"` and one with +`-DCMAKE_C_FLAGS_RELEASE="-O3 -fno-vectorize -fno-slp-vectorize"`. +Then run benchmarks for relevant parts, and compare the measured +runtimes for the `_c` suffixed versions. If the vectorized version is +faster (lower benchmark numbers) than the non-vectorized, the compiler +handled the function well. If the vectorized version is slower than +the non-vectorized version, we have found a case that probably should be +investigated, and where compiler autovectorization is hurting the +performance of dav1d. + +As a concrete example, running +`./External/dav1d/dav1d_checkasm --bench --test=mc_8bpc --function=mct_8tap_regular_w128_0_8bpc 0` in both a vectorized and non-vectorized build, +we'd get the following numbers: + +Vectorization disabled: +``` +mct_8tap_regular_w128_0_8bpc_c: 180.9 ( 1.00x) +mct_8tap_regular_w128_0_8bpc_neon: 10.8 (16.69x) +mct_8tap_regular_w128_0_8bpc_dotprod: 10.8 (16.74x) +``` + +Vectorization enabled: +``` +mct_8tap_regular_w128_0_8bpc_c: 18.1 ( 1.00x) +mct_8tap_regular_w128_0_8bpc_neon: 10.8 ( 1.68x) +mct_8tap_regular_w128_0_8bpc_dotprod: 10.8 ( 1.67x) +``` + +Here, the compiler vectorized version was almost 10x as fast as the +non-vectorized version, reaching close to the performance of the +handwritten implementation. + + +A different example of the effect of vectorization can be found +by benchmarking with `./External/dav1d/dav1d_checkasm --bench --test=cdef_8bpc 0`. +There we can get the following numbers: + +Vectorization disabled: +``` +cdef_filter_4x8_10_8bpc_c: 7.4 ( 1.00x) +cdef_filter_4x8_10_8bpc_neon: 1.6 ( 4.51x) +``` + +Vectorization enabled: +``` +cdef_filter_4x8_10_8bpc_c: 11.3 ( 1.00x) +cdef_filter_4x8_10_8bpc_neon: 1.7 ( 6.84x) +``` + +Here, the code generated by vectorization is not beneficial, and +ends up slowing down this particular testcase. + + +Locating the source and generated code for tests +------------------------------------------------ + +Large parts of the dav1d decoder is templated C code, which is +compiled twice, with varying data type definitions - once for +`8bpc` (8 bit per component) and once for `16bpc`. Code in files +named `*_tmpl.c` is compiled in such a way. + +To investigate the behaviour behind one individual benchmark result, +the mapping from benchmark case names to actual source code isn't +always trivial. It may be easiest to start out with the definition +of the test itself, within e.g. `dav1d/tests/checkasm/*.c`, looking +for which function it actually calls. + +As an example, one function observed above, +`mct_8tap_regular_w128_0_8bpc`, gets tested in `dav1d/tests/checkasm/mc.c`, +in the `check_mct` function. The individual test variant gets set up +in this function call: + +``` + if (check_func(c->mct[filter], "mct_%s_w%d_%s_%dbpc", + filter_names[filter], w, mxy_names[mxy], BITDEPTH)) +``` + +This means that the tested function is `c->mct[filter]`. In this case, +the function pointer gets set by `bitfn(dav1d_mc_dsp_init)(&c)`, which +is implemented in `dav1d/src/mc_tmpl.c`. For the case of +`mct_8tap_regular_w128_0_8bpc`, this maps to the function +`prep_8tap_regular_c` (which is defined via macro expansion, so it's not +easily greppable), which calls the function `prep_8tap_c`. Within the +function `prep_8tap_c`, there are four different cases, switched between +based on whether the input parameters `mx` and `my` are zero or nonzero. +In the case of the `_0_` variant, both `mx` and `my` would be zero, and +the called code is in the function `prep_c`. + +The generated code for e.g. those functions can be found in the object file +`External/dav1d/CMakeFiles/dav1d_bitdepth_8.dir/__/__/test-suite-externals/dav1d/src/mc_tmpl.c.o`. diff --git a/External/dav1d/cli_config.h b/External/dav1d/cli_config.h new file mode 100644 index 000000000..0d5925946 --- /dev/null +++ b/External/dav1d/cli_config.h @@ -0,0 +1,3 @@ +#pragma once + +#define HAVE_XXHASH_H 0 diff --git a/External/dav1d/config.asm b/External/dav1d/config.asm new file mode 100644 index 000000000..83ada0557 --- /dev/null +++ b/External/dav1d/config.asm @@ -0,0 +1,11 @@ +%ifdef ARCH_X86_64 +%define ARCH_X86_32 0 +%define STACK_ALIGNMENT 16 +%else +%define ARCH_X86_32 1 +%define ARCH_X86_64 0 +%define STACK_ALIGNMENT 4 +%endif +%define FORCE_VEX_ENCODING 0 +%define PIC 1 +%define private_prefix dav1d diff --git a/External/dav1d/config.h b/External/dav1d/config.h new file mode 100644 index 000000000..c0e6080f7 --- /dev/null +++ b/External/dav1d/config.h @@ -0,0 +1,169 @@ +#pragma once + +#ifdef __aarch64__ +#define ARCH_AARCH64 1 +#define AS_ARCH_LEVEL armv8.6-a+crc +#define HAVE_DOTPROD 1 +#define HAVE_I8MM 1 +#define HAVE_SVE 1 +#define HAVE_SVE2 1 +#elif defined(__arm__) +#define ARCH_ARM 1 +#elif defined(__i386__) && !defined(NO_X86ASM) +#define ARCH_X86 1 +#define ARCH_X86_32 1 +#elif defined(__x86_64__) && !defined(NO_X86ASM) +#define ARCH_X86 1 +#define ARCH_X86_64 1 +#endif + +#ifndef ARCH_AARCH64 +#define ARCH_AARCH64 0 +#endif +#ifndef ARCH_ARM +#define ARCH_ARM 0 +#endif +#ifndef ARCH_X86 +#define ARCH_X86 0 +#endif +#ifndef ARCH_X86_32 +#define ARCH_X86_32 0 +#endif +#ifndef ARCH_X86_64 +#define ARCH_X86_64 0 +#endif + +#define ARCH_LOONGARCH 0 + +#define ARCH_LOONGARCH32 0 + +#define ARCH_LOONGARCH64 0 + +#define ARCH_PPC64LE 0 + +#define ARCH_RISCV 0 + +#define ARCH_RV32 0 + +#define ARCH_RV64 0 + + +#define CONFIG_16BPC 1 + +#define CONFIG_8BPC 1 + +#define CONFIG_LOG 1 + +#define CONFIG_MACOS_KPERF 0 + +#ifdef __BYTE_ORDER__ +# if (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +# define ENDIANNESS_BIG 1 +# else +# define ENDIANNESS_BIG 0 +# endif +#elif defined(_WIN32) +# define ENDIANNESS_BIG 0 +#else +# error Unknown endianness. +#endif + +#define HAVE_ALIGNED_ALLOC 0 + +#ifdef NO_X86ASM +#define HAVE_ASM 0 +#else +#define HAVE_ASM 1 +#endif + +#define HAVE_AS_ARCHEXT_DOTPROD_DIRECTIVE 0 + +#define HAVE_AS_ARCHEXT_I8MM_DIRECTIVE 0 + +#define HAVE_AS_ARCHEXT_SVE2_DIRECTIVE 1 + +#define HAVE_AS_ARCHEXT_SVE_DIRECTIVE 1 + +#define HAVE_AS_ARCH_DIRECTIVE 1 + +#define HAVE_AS_FUNC 0 + +#define HAVE_C11_GENERIC 1 + +#ifdef __linux__ +#define HAVE_CLOCK_GETTIME 1 +#else +#define HAVE_CLOCK_GETTIME 0 +#endif + +#define HAVE_DLSYM 0 + +#define HAVE_ELF_AUX_INFO 0 + +#ifdef __linux__ +#define HAVE_GETAUXVAL 1 +#else +#define HAVE_GETAUXVAL 0 +#endif + +#ifdef _WIN32 +#define HAVE_IO_H 1 +#else +#define HAVE_IO_H 0 +#endif + +#define HAVE_MEMALIGN 0 + +#ifdef _WIN32 +#define HAVE_POSIX_MEMALIGN 0 +#else +#define HAVE_POSIX_MEMALIGN 1 +#endif + +#ifdef __linux__ +#define HAVE_PTHREAD_GETAFFINITY_NP 1 +#else +#define HAVE_PTHREAD_GETAFFINITY_NP 0 +#endif + +#define HAVE_PTHREAD_NP_H 0 + +#ifdef __linux__ +#define HAVE_PTHREAD_SETAFFINITY_NP 1 +#else +#define HAVE_PTHREAD_SETAFFINITY_NP 0 +#endif + +#define HAVE_PTHREAD_SETNAME_NP 1 + +#define HAVE_PTHREAD_SET_NAME_NP 0 + +#define HAVE_SYS_TYPES_H 1 + +#ifdef _WIN32 +#define HAVE_UNISTD_H 0 +#else +#define HAVE_UNISTD_H 1 +#endif + +#if !defined(PIC) && (defined(__PIC__) || defined(__pic__)) +#define PIC 3 +#endif + +#if defined(__APPLE__) || (defined(_WIN32) && defined(__i386__)) +#define PREFIX 1 +#endif + +#define TRIM_DSP_FUNCTIONS 0 + +#ifdef _WIN32 +#define UNICODE 1 +#define _CRT_DECLARE_NONSTDC_NAMES 1 +#define _FILE_OFFSET_BITS 64 +#define _UNICODE 1 +#endif + +#ifdef _MSC_VER +#define fseeko _fseeki64 +#define ftello _ftelli64 +#endif diff --git a/External/dav1d/vcs_version.h b/External/dav1d/vcs_version.h new file mode 100644 index 000000000..655fd5725 --- /dev/null +++ b/External/dav1d/vcs_version.h @@ -0,0 +1 @@ +#define DAV1D_VERSION "1.5.0" diff --git a/Fortran/CMakeLists.txt b/Fortran/CMakeLists.txt index 56a83e15e..715dd162c 100644 --- a/Fortran/CMakeLists.txt +++ b/Fortran/CMakeLists.txt @@ -13,7 +13,7 @@ add_subdirectory(SNAP) # test suite. These have not been widely tested, so they are only enabled for # certain combinations of platforms and architectures. if (NOT WIN32 AND NOT APPLE) - if (ARCH MATCHES "x86" OR ARCH MATCHES "AArch64") + if (ARCH MATCHES "x86" OR ARCH MATCHES "AArch64" OR ARCH MATCHES "LoongArch") add_subdirectory(gfortran) endif() endif() diff --git a/Fortran/gfortran/README.md b/Fortran/gfortran/README.md index 8076e820b..1d73da79b 100644 --- a/Fortran/gfortran/README.md +++ b/Fortran/gfortran/README.md @@ -16,7 +16,7 @@ behavior of the binary produced by the compiler. Currently, only the _execute_ tests are supported in `regression`. Both `compile` and `execute` tests have been enabled in `torture`. -Of the supported tests, a number of tests have been disabled. These are listed +Of the supported tests, a number of tests have been disabled. These are listed in the `DisabledFiles.cmake` files that can be found in the various subdirectories of `Fortran/gfortran`. There are four categories of such tests: @@ -35,12 +35,12 @@ features. - *Failing*: These tests fail at test-time. - - For "execute" tests, some crash on execution, others produce - incorrect/unexpected output. This could be a result of a bug in the - compiler/code generator or the runtime. +- For "execute" tests, some crash on execution, others produce +incorrect/unexpected output. This could be a result of a bug in the +compiler/code generator or the runtime. - - For "compile" tests, this could be because the compilation succeeds when it - is expected to fail, or vice versa. +- For "compile" tests, this could be because the compilation succeeds when it +is expected to fail, or vice versa. Over time, the number of tests in the *unimplemented*, *skipped*, and *failing* categories should decrease. Eventually, only the *unsupported* category should @@ -116,7 +116,7 @@ are unrelated to the gfortran tests here. Additional denylists for a particular feature can be included by creating `DisabledFiles_FEATURE.cmake` files (in the same format as those for the default -denylists, `DisabledFiles.cmake`), and adding FEATURE to +denylists, `DisabledFiles.cmake`), and adding FEATURE to `TEST_SUITE_FORTRAN_FEATURES`. Additional compiler flags can be added using `CMAKE_Fortran_FLAGS`. @@ -131,8 +131,8 @@ cmake -DTEST_SUITE_FORTRAN_FEATURES=FOO \ `DisabledFiles_FOO.cmake` files can be created in the appropriate subdirectories if enabling the feature/flag results in the failure of tests that otherwise pass. Conversely, the feature/flag may cause some disabled tests to pass. These can be -added to an allowlist file, `EnabledFiles_FOO.cmake` in the corresponding -directory. The file must contain a single variable named `ENABLED_FILES` with +added to an allowlist file, `EnabledFiles_FOO.cmake` in the corresponding +directory. The file must contain a single variable named `ENABLED_FILES` with the file names of the tests that should be enabled (in the case of multi-file tests, this should be the name of the "main" file). An example of such a list is below. @@ -152,8 +152,8 @@ implemented at a steady pace. The relevant tests in this directory should be enabled. This would involve building the test suite with one of the `TEST_SUITE_FORTRAN_*` flags described above. -The build system uses static test configuration files named `tests.cmake` to be -found in the various subdirectories of the test suite. These are generated by +The build system uses static test configuration files named `tests.cmake` to be +found in the various subdirectories of the test suite. These are generated by `utils/update-test-config.py`. The configuration files are the result of parsing the relevant DejaGNU annotations from the test files and are used by the various `CMakeLists.txt` files to set up the tests. These configuration files *must not* @@ -178,21 +178,161 @@ Each field is described in the table below: | ``| A space-separated list of targets on which the test is disabled. Each element of the list will be a regular expression that is expected to match an LLVM target triple. The test `kind`'s generally reflect what is being tested. For instance, -`preprocess` tests only run the preprocessor, `assemble` tests generate assembly +`preprocess` tests only run the preprocessor, `assemble` tests generate assembly but no object code, the `compile` tests generate object code but do not invoke the linker while the `link` tests do invoke the linker. The `run` tests are "end-to-end" in that the code is compiled, linked and executed. These tests generally examine the output of the execution to ensure that the behavior of the generated executable is as expected. -The test files should be kept in sync with gfortran. This has to be done -manually. When performing this update, the test configuration files must be +The test files should be kept in sync with gfortran. This has to be done +manually. When performing this update, the test configuration files must be regenerated. This can be done by running `update-test-config.py` in the root of the test suite. The `-h` switch can be provided to the script for additional options. The test files in `regression` and `torture` *must not* be modified. +### Overriding DejaGNU annotations ### + +In some cases, it may be necessary to override the DejaGNU annotations. Some of +these include: + +- To invert the xfail status of a test (which is usually needed when there is +a difference in behavior between gfortran and flang) + +- To selectively enable/disable a test on a particular platform. + +- In cases where a warning is expected, to override the gfortran-specific +warning message with a flang-specific one. + +For now, only limited forms of overriding are supported. In particular, we +do not support overriding warning messages, but that might be supported in the +future. + +In order to override annotations for tests in a given directory, create a +file named `override.yaml` in that directory. The format of the file is +described below. After the file has been populated, the static test +configuration files must be updated by running `update-test-config.py` as shown +below. This will update all the `tests.cmake` files in the test suite. + +``` +$ cd /path/to/llvm-test-suite/Fortran/gfortran +$ ./utils/update-test-config.py +``` + +#### override.yaml #### + +The `override.yaml` file can only be used to override attributes of tests +contained within the directory containing `override.yaml`. In order to override +attributes of tests in subdirectories, an `override.yaml` file must be created +in the subdirectory. This file only needs to be created if necessary. Unlike +the `DisabledFiles.cmake` files, it does not need to be present if test +attributes do not need to be overridden. At a high level, the format of the +file is as follows: + +``` +--- +"file1": + attr-name-1: attr-val-1 + attr-name-2: attr-val-2 + ... + +"file2": + attr-name-1: attr-val-2 + ... + +... + +``` + +Here each "fileN" is the full file name (including the extension) of the test. +In the case of multi-file tests, this must be the name of the main test file. +This must not contain paths (relative or absolute). + +The following attributes are currently supported. + +#### `disabled_on` ### + +The value must be a list of strings. Each string describes a platform on which +the test must be disabled. This will usually be a target triple, and may be a +regex. + +This can be used to disable tests on platforms on which they have been +explicitly enabled (this usually occurs when tests are restricted to run only +on certain platforms). In such cases, the string that is used to specify the +platform must exactly match the string that was used in a DejaGNU annotation. +For instance, the test `regression/simd-builtins-1.f90` is explicitly enabled +on certain Linux platforms only. This is done in the following annotation: + +``` +! { dg-do compile { target i?86-*-linux* x86_64-*-linux* aarch64*-*-linux* } } +``` + +In order to disable this test on AArch64-based Linux systems, the following +should be added to `regression/override.yaml`: + +``` +"simd_builtins-1.f90": + disabled_on: ["aarch64*-*-linux*"] +``` + +Note that the string in the `disabled_on` list exactly matches that in the +DejaGNU annotation. If you are not sure which string to use, check the +`enabled_on` column in the entry for the test in the static test configuration +file, `tests.cmake`, which will be present in the directory containing the test. + +If the `enabled_on` column for the test in `tests.cmake` is empty, the test is +run on all platforms. In this case, overriding the `disabled_on` attribute is +perfectly safe and will result in the test being executed on all platforms +_except_ those in the `disabled_on list`. + +*WARNING*: However, if any of the strings in the `disabled_on` list is the sole +entry in the `enabled_on` column for the test in `tests.cmake`, the test will +end up being enabled on _all_ platforms _except_ those in `disabled_on` list. +This is a known issue. There is no timeline for a fix for this. + +#### `enabled_on` #### + +The value must be a list of strings. Each string describes a platform on which +the test must be enabled. This will usually be a target triple, and may be a +regex. + +This attribute will nearly always be used to override DejaGNU annotations that +disable the test on certain platforms. In such cases, the string *must* exactly +match the string that is present in the test file. For instance, the test +`regression/chmod_1.f90` is disabled on cygwin using the following annotation: + +``` +! { dg-do run { target { ! { *-*-mingw* *-*-cygwin* } } } } +``` + +In order to enable this test on cygwin, the following must be added to +`regression/override.yaml`: + +``` +"chmod_1.f90": + enabled_on: ["*-*-cygwin*"] +``` + +Note that the string in the value of `enabled_on` exactly matches the string +in the `regression/chmod_1.f90`. If you are not sure which string to add to +the `override.yaml` file, check the `disabled_on` column in the entry for the +test in the static test configuration file, `tests.cmake`, which will be +present in the directory containing the test. + +*WARNING*: If this is used on a test that is not disabled on any platform, it +will result in the test being enabled _only_ on the platforms specified in the +`enabled_on` list. + +#### `xfail` #### + +The value must be a boolean, i.e. `true` or `false`. If `true`, the test will +be expected to fail, and if `false`, the test will be expected to pass. If +the `xfail` column in the entry for the test in `tests.cmake` is the same as +the overridden value provided here, there will be no change in the behavior +of the test. + ### TODO's ### @@ -200,7 +340,7 @@ If some of the items listed here are implemented, even in part, it should allows us to make better use of the test-suite. Several DejaGNU directives from the test files are either ignored or only -partially supported - i.e. only a subset of the options specified by the +partially supported - i.e. only a subset of the options specified by the directive are handled correctly. In some cases, those directives check that the language feature/optimization being exercised by the tests is actually handled correctly. By ignoring them, we are @@ -231,8 +371,8 @@ thereby causing the test to pass. #### `dg-warning` directive #### -Currently, the `dg-warning` directive is ignored. It ought to be possible to -treat in a manner similar to `dg-error`. +Currently, the `dg-warning` directive is ignored. It ought to be possible to +treat this in a manner similar to `dg-error`. #### `scan-tree-dump` directive #### @@ -249,24 +389,25 @@ of GCC to an equivalent representation in LLVM IR. #### `target` directive #### The `target` directive is used to restrict tests to run on certain platforms -and/or systems. The directive can be fairly complex. While in most cases the -directive simply consists of a triple specifying the platform on which the -test is enabled (or disabled), negations, logical `and` and `or` operations are -also permitted. The directives can appear in several places within -a test file. In addition to "top-level" directives which control whether or -not the entire test is enabled, a `target` directive can also be used to -conditionally emit or suppress an error or a warning on certain platforms. +and/or systems. The directive can be fairly complex. While in most cases the +directive simply consists of a triple specifying the platform on which the +test is enabled (or disabled), negations, logical `and` and `or` operations are +also permitted. The directives can appear in several places within +a test file. In addition to "top-level" directives which control whether or +not the entire test is enabled, a `target` directive can also be used to +conditionally emit or suppress an error or a warning on certain platforms. Currently, there is limited support for these directives. -- Logical operators on `target` directives are not supported. Directives +- Logical operators on `target` directives are not supported. Directives containing these operators are ignored entirely. -- Only "top-level" directives are handled. `target` directives that appear -inside other directives such as `dg-error` or `dg-warning` are ignored. +- Only "top-level" directives are handled. `target` directives that appear +inside other directives such as `dg-error` or `dg-warning` are ignored. #### Platform-specific disabling of tests #### -Some tests fail on certain platforms but not on others. There is, currently, no -way to disable these tests on a specific platform and these are disabled -everywhere. This is obviously not ideal since the extra coverage that the tests -provide — even if on a limited set of platforms — is desirable. +Some tests fail on certain platforms but not on others. It is possible to +disable such tests on the failing platforms and/or enable them selectively +only on certain platforms. See the [Overriding +DejaGNU annotations](#Overriding-DejaGNU-Annotations) section for details on +how this can be done. diff --git a/Fortran/gfortran/regression/DisabledFiles.cmake b/Fortran/gfortran/regression/DisabledFiles.cmake index 54d176a3c..026fb4d84 100644 --- a/Fortran/gfortran/regression/DisabledFiles.cmake +++ b/Fortran/gfortran/regression/DisabledFiles.cmake @@ -134,6 +134,11 @@ file(GLOB UNSUPPORTED_FILES CONFIGURE_DEPENDS # Similar test added: UnitTests/execute_command_line execute_command_line_1.f90 execute_command_line_3.f90 + + # Test is not conformant: reference to f() in tobias::sub1 violates Fortran + # 2023 (and before) 15.5.2.14 point (4). `f()` references the actual argument + # of `x` while `x` does not have the TARGET or POINTER attribute. + aliasing_array_result_1.f90 ) # These tests are skipped because they hit a 'not yet implemented' assertion @@ -824,19 +829,6 @@ file(GLOB SKIPPED_FILES CONFIGURE_DEPENDS quad_1.f90 internal_dummy_3.f08 - # -------------------------------------------------------------------------- - # - # These tests are skipped because they fail on AArch64 but not x86. These - # will be disabled until we allow tests to be selectively enabled on certain - # platforms. - - large_integer_kind.f90 - maxlocval_1.f90 - pr91497.f90 - - alloc_comp_class_4.f03 # TODO: This also fails on X86, so recategorize - unpack_bounds_1.f90 # TODO: This also fails on X86, so recategorize - # -------------------------------------------------------------------------- # # These are skipped almost certainly because of a bug in the way multi-file @@ -920,6 +912,7 @@ file(GLOB FAILING_FILES CONFIGURE_DEPENDS stop_shouldfail.f90 # STOP stops program # require further analysis + alloc_comp_class_4.f03 bounds_check_10.f90 bounds_check_7.f90 bounds_check_array_ctor_1.f90 @@ -1021,12 +1014,6 @@ file(GLOB FAILING_FILES CONFIGURE_DEPENDS do_check_1.f90 random_3.f90 - # These tests fail at runtime on AArch64 (but pass on x86). Disable them - # anyway so the test-suite passes by default on AArch64. - entry_23.f - findloc_8.f90 - pr99210.f90 - # These tests go into an infinite loop printing "Hello World" pointer_check_1.f90 pointer_check_2.f90 @@ -1637,6 +1624,12 @@ file(GLOB FAILING_FILES CONFIGURE_DEPENDS maxloc_bounds_5.f90 ptr_func_assign_1.f08 + # Tests looking for runtime errors (e.g., bound checks). Correctly + # caught by flang runtime, but not caught with Flang optimizations, + # e.g. due to intrinsics inlining. These can pass with -O0: + cshift_bounds_3.f90 + cshift_bounds_4.f90 + # Bad test, assigning an 11 elements array to a 12 elements array. transfer_array_intrinsic_4.f90 @@ -1853,8 +1846,4 @@ file(GLOB FAILING_FILES CONFIGURE_DEPENDS # Tests expect semantic errors that are not raised. c_sizeof_7.f90 - - # Tests that use "PRINT namelistname" - namelist_print_2.f - print_fmt_2.f90 ) diff --git a/Fortran/gfortran/regression/override.yaml b/Fortran/gfortran/regression/override.yaml new file mode 100644 index 000000000..73010bd9b --- /dev/null +++ b/Fortran/gfortran/regression/override.yaml @@ -0,0 +1,56 @@ +--- +# Overrides for DejaGNU annotations for tests in the directory in which this +# file is present. To override tests in subdirectories, create/edit an +# override.yaml file in that subdirectory. +# +# Please see gfortran/README.md for instructions on editing this file. +# +# This file is broadly divided into two. The first section contains tests that +# are "temporarily" overridden. These are usually tests that fail on certain +# platforms, but pass on others, but are intended to pass everywhere. When the +# underlying issue is addressed, the tests should be removed. Eventually, there +# should be no tests at all in this section. The second section contains tests +# that are "permanently" overridden - usually because flang's behavior deviates +# from gfortran, but we want the tests to run anyway. +# +# When adding a test to this file, please leave a comment describing why the +# behavior of the test is being overridden. + + +# ------------------------ TEMPORARILY OVERRIDDEN TESTS ------------------------ + +# findloc_8.f90 currently causes an assertion failure in SelectionDAG.cpp on +# some platforms. +# +# Assertion `Elt->getBitWidth() == EltVT.getSizeInBits() && "APInt size does not match type size!"' failed. +# +"findloc_8.f90": + disabled_on: ["aarch64-*-*", "loongarch64-*-*", "ppc64le-*-*"] + +# entry_23.f raises a segmentation fault at runtime, on some platforms. +"entry_23.f": + disabled_on: ["aarch64-*-*", "loongarch64-*-*", "ppc64le-*-*"] + +# pr91497.f90 fails to compile on some platforms with the following message: +# error: 'kind=' argument must be a constant scalar integer whose value is a +# supported kind for the intrinsic result type. +"pr91497.f90": + disabled_on: ["aarch64-*-*", "loongarch64-*-*", "ppc64le-*-*"] + +# maxlocval_1.f90 fails at runtime with "STOP: code 1" on AArch64. This is not +# expected and does not occur on other targets. +"maxlocval_1.f90": + disabled_on: ["aarch64-*-*"] + + +# ------------------------ PERMANENTLY OVERRIDDEN TESTS ------------------------ + +# namelist_print_2.f and print_fmt_2.f90 use ```print ```. This +# is a non-standard extension that is not supported in certain cases in +# gfortran, but is always supported in flang. + +"namelist_print_2.f": + xfail: false + +"print_fmt_2.f90": + xfail: false diff --git a/Fortran/gfortran/regression/tests.cmake b/Fortran/gfortran/regression/tests.cmake index 4c26c05fe..c093a7dbc 100644 --- a/Fortran/gfortran/regression/tests.cmake +++ b/Fortran/gfortran/regression/tests.cmake @@ -44,6 +44,7 @@ preprocess;warning-directive-2.F90;xfail;-std=f95 -fdiagnostics-show-option -Wer preprocess;warning-directive-3.F90;;-std=f95 -fdiagnostics-show-option -Werror -Wno-error=cpp;; preprocess;warning-directive-4.F90;;-std=f95 -fdiagnostics-show-option -Wno-cpp;; assemble;module_naming_1.f90;;;; +assemble;pr88833.f90;;-O3 -march=armv8.2-a+sve --save-temps;; assemble;same_name_1.f90;;;; compile;20181025-1.f;;-Ofast;; compile;20231103-1.f90;;-Ofast;; @@ -1946,7 +1947,7 @@ compile;namelist_args.f90;xfail;-std=gnu;; compile;namelist_assumed_char.f90;xfail;-std=f95;; compile;namelist_blockdata.f;xfail;;; compile;namelist_empty.f90;;-std=legacy;; -compile;namelist_print_2.f;xfail;-std=f95;; +compile;namelist_print_2.f;;-std=f95;; compile;namelist_utf8.f90;;;; compile;nan_4.f90;;-std=gnu -fallow-invalid-boz;; compile;nan_5.f90;;-fno-range-check;; @@ -2635,7 +2636,6 @@ compile;pr88379.f90;;-fcoarray=single;; compile;pr88467.f90;xfail;;; compile;pr88552.f90;xfail;;; compile;pr88624.f90;;-fcoarray=lib;; -compile;pr88833.f90;;-O3 -march=armv8.2-a+sve --save-temps;; compile;pr88902.f90;;-flto --param ggc-min-heapsize=0;; compile;pr88932.f90;;-O1 -fpredictive-commoning -fno-tree-ch -fno-tree-dominator-opts -fno-tree-fre;; compile;pr88934.f90;;-O -ftree-vectorize;; @@ -2671,7 +2671,7 @@ compile;pr91372.f90;;;; compile;pr91471.f90;;;; compile;pr91485.f90;;;; compile;pr91496.f90;;-fdump-tree-original;; -compile;pr91497.f90;;-Wall;; +compile;pr91497.f90;;-Wall;;aarch64-.+-.+ loongarch64-.+-.+ ppc64le-.+-.+ compile;pr91497_2.f90;;-Wall;; compile;pr91564.f90;xfail;;; compile;pr91565.f90;xfail;;; @@ -2852,7 +2852,7 @@ compile;predict-3.f90;;-fno-tree-fre -fno-tree-ccp -Og;; compile;present_1.f90;xfail;;; compile;print_1.f90;xfail;;; compile;print_2.f90;xfail;;; -compile;print_fmt_2.f90;xfail;;; +compile;print_fmt_2.f90;;;; compile;print_fmt_3.f;;;; compile;print_fmt_4.f;xfail;;; compile;print_fmt_5.f90;xfail;;; @@ -4701,7 +4701,7 @@ run;entry_12.f90;;;; run;entry_13.f90;;;; run;entry_14.f90;;;; run;entry_16.f90;;;; -run;entry_23.f;;;; +run;entry_23.f;;;;aarch64-.+-.+ loongarch64-.+-.+ ppc64le-.+-.+ run;entry_26.f90;;-fno-f2c;; run;entry_27.f90;;-ff2c;; run;entry_3.f90;;;; @@ -4806,7 +4806,7 @@ run;findloc_3.f90;;;; run;findloc_4.f90;;;; run;findloc_5.f90;;;; run;findloc_6.f90;;;; -run;findloc_8.f90;;;; +run;findloc_8.f90;;;;aarch64-.+-.+ loongarch64-.+-.+ ppc64le-.+-.+ run;float_1.f90;;;; run;flush_1.f90;;;; run;fmt_bz_bn.f;;;; @@ -5248,7 +5248,7 @@ run;maxloc_bounds_6.f90;xfail;-fbounds-check;; run;maxloc_bounds_7.f90;xfail;-fbounds-check;; run;maxloc_bounds_8.f90;xfail;-fbounds-check;; run;maxloc_string_1.f90;;;; -run;maxlocval_1.f90;;;; +run;maxlocval_1.f90;;;;aarch64-.+-.+ run;maxlocval_2.f90;;;; run;maxlocval_3.f90;;;; run;maxlocval_4.f90;;;; diff --git a/Fortran/gfortran/utils/update-test-config.py b/Fortran/gfortran/utils/update-test-config.py index 2c190bdc1..f42424ce7 100755 --- a/Fortran/gfortran/utils/update-test-config.py +++ b/Fortran/gfortran/utils/update-test-config.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 # # This script parses the DejaGNU annotations from the files in the gfortran test -# suite and updates the static test configuration files. This must -# be run whenever the tests are updated with new tests from upstream -# gfortran. There are currently several limitations in the way the annotations -# are parsed and how they are dealt with in the static test -# configuration. These are described in inline comments. The format of the -# static test configuration files is also documented inline. +# suite and updates the static test configuration files. This must be run +# whenever the tests are updated with new tests from upstream gfortran. There +# are currently several limitations in the way the annotations are parsed and +# how they are dealt with in the static test configuration. These are described +# in inline comments. The format of the static test configuration files is also +# documented inline. # # This script modifies the test configuration files in place. If this is not # desirable, @@ -20,7 +20,7 @@ import os import re import shutil -import typing +import yaml # Class representing a single test. The fields of the test should be those that # are eventually serialized into the test configuration. The configuration will @@ -108,7 +108,7 @@ def __str__(self): re_btxt = re.compile('[{][ ]*(.+?)[ ]*[}]') re_fortran = re.compile('^.+[.][Ff].*$') -re_assemble = re.compile(f'{pfx}dg-(lto-)?do[ ]*assemble{sfx}') +re_assemble = re.compile(f'{pfx}dg-(lto-)?do[ ]*assemble[ ]*{tgt}{sfx}') re_preprocess = re.compile(f'{pfx}dg-do[ ]*preprocess{sfx}') re_compile = re.compile(f'{pfx}dg-do[ ]*compile[ ]*{tgt}{sfx}') re_link = re.compile(f'{pfx}dg-(lto-)?do[ ]*link[ ]*{tgt}{sfx}') @@ -129,7 +129,7 @@ def __str__(self): # Maps from known platforms to triples that LLVM will understand. # FIXME: The ia32 target probably does not always correspond to i386. Does it -# means that it will be enabled on other non-X86 platforms? +# mean that it will be enabled on other non-X86 platforms? platforms = {'ia32': 'i386-*-*'} # Get the n-th level ancestor of the given file. The 1st level ancestor is @@ -145,7 +145,6 @@ def get_ancestor(f: str, n: int) -> str: def get_encoding(filepath: str) -> str | None: with open(filepath, 'rb') as f: return chardet.detect(f.read())['encoding'] - return None # Get the lines in the file. def get_lines(filepath: str) -> list[str]: @@ -226,17 +225,22 @@ def error(fmt: str, *args) -> None: # The target is usually a regular expression. But the regex syntax used by # DejaGNU is not exactly the same as that supported by cmake. This translates # the DejaGNU regex to a cmake-compatible regex. +# +# WARNING: This function is not intended to be a faithful translation of all +# DejaGNU regexes to equivalent CMake regexes. The target specifications used in +# the gfortran test suite happen to use a subset of the regex language, so we +# can get away with doing quick and easy replacements. def convert_target_regex(t: str) -> str: - # XXX: This translation is not strictly correct. # In DejaGNU, the ? character matches a single character unless it follows - # an atom. In the target specifications in the gfortran test suite, this is - # only used as a single character match. + # an atom. In the target specifications in the gfortran test suite, however, + # it is only used as a single character match, so just replace it with the + # cmake equivalent. t = t.replace('?', '.') - # XXX: This translation is not strictly correct. - # in DejaGNU, the * character can also be a wildcard match for zero or more + # In DejaGNU, the * character can also be a wildcard match for zero or more # characters unless it follows an atom. In the target specifications in the - # gfortran test suite, it is only used as a wildcard. + # gfortran test suite, however, it is only used as a wildcard match, so just + # replace it with the cmake equivalent. t = t.replace('*', '.+') return t @@ -369,8 +373,8 @@ def collect_tests(d: str) -> list[Test]: ' \n'.join([os.path.basename(f) for f in remove]) ) - # Find all the files that are dependencies of some file that is the - # main file in a test. + # Find all the files that are dependencies of the main file of a multi-file + # test dependents = set([]) for filename in files: for l in get_lines(filename): @@ -511,12 +515,12 @@ def parse_tests(filename: str) -> list[Test]: lno + 1 ) - kind = elems[0] - sources = elems[1].split(' ') - xfail = True if elems[2] == 'xfail' else False - options = elems[3].split(' ') - enabled_on = elems[4].split(' ') - disabled_on = elems[5].split(' ') + kind: str = elems[0] + sources: list[str] = elems[1].split(' ') + xfail: bool = True if elems[2] == 'xfail' else False + options: list[str] = elems[3].split(' ') + enabled_on: list[str] = elems[4].split(' ') + disabled_on: list[str] = elems[5].split(' ') tests.append( Test(kind, sources, options, enabled_on, disabled_on, xfail) @@ -524,6 +528,124 @@ def parse_tests(filename: str) -> list[Test]: return tests +# Parse the override file. The file is guaranteed to exist. +def parse_override_file(filename: str) -> dict: + def type_error(attr: str, key: str, typ: str) -> None: + error( + 'Value of attribute "{}" in key "{}" must be of type "{}"', + attr, + key, + typ + ) + + yml = {} + with open(filename, "r") as f: + yml = yaml.safe_load(f) + + # Check that the keys and the values are as we expect. We could have used a + # schema for this, and probably should, but this should be sufficient for + # now. + for main, attrs in yml.items(): + # The keys must be strings. They must also be a valid main file for a + # test, so it would be nice to check for that too, but it is probably + # not worth the extra hassle. + if not isinstance(main, str): + error('Key "{}" in override file must be a string', main) + + if not isinstance(attrs, dict): + error('Key "{}" must be mapped to a dictionary', main) + + for attr, val in attrs.items(): + if not isinstance(attr, str): + error('Attribute "{}" in key "{}" must be a string', attr, main) + + # We could, in principle, allow 'disabled_on' and 'enabled_on' to be + # strings. For now, force them to be lists even if they contain only + # a single element. Empty lists are allowed, even if they are + # somewhat useless. + if attr == 'disabled_on': + if not isinstance(val, list): + type_error(attr, main, 'array') + elif attr == 'enabled_on': + if not isinstance(val, list): + type_error(attr, main, 'array') + elif attr == 'xfail': + if not isinstance(val, bool): + type_error(attr, main, 'boolean') + else: + error('Unknown attribute "{}" in key "{}"', attr, main) + + # We allow the target specifications in the `enabled_on` and `disabled_on` + # lists to use * as a wildcard match. This is to keep it consistent with + # the DejaGNU specifications in the tests. But that syntax is not + # compatible with CMake regexes, so they need to be converted before use. + for _, attrs in yml.items(): + for k in ['enabled_on', 'disabled_on']: + if k in attrs: + attrs[k] = [convert_target_regex(r) for r in attrs[k]] + + return yml + +# Override the disabled_on property of the test. +def override_disabled_on(disabled_on: list[str], t: Test) -> None: + message('Overriding "disabled_on" in {}', t.sources[0]) + + # Some tests could be explicitly enabled on certain platforms. When + # disabling such tests, the corresponding entry should be removed from the + # enabled_on list. The match must be exact. Regex matches are not, and + # likely will never be, supported. + for s in disabled_on: + if s in t.enabled_on and len(t.enabled_on) == 1: + error( + ('"{}" is the sole remaining entry in the enabled_on list of '\ + 'test "{}". This will result in the test being enabled on '\ + 'all platforms except "{}". This is a known issue and is '\ + 'currently not supported. You may need to add the test to '\ + 'DisabledFiles.cmake to disable the test altogether'), + s, + t.sources[0], + s + ) + elif s in t.enabled_on: + t.enabled_on.remove(s) + t.disabled_on.extend(disabled_on) + +# Override the enabled_on property of the test. +def override_enabled_on(enabled_on: list[str], t: Test) -> None: + message('Overriding "enabled_on" in {}', t.sources[0]) + + # A test will typically run on all platforms except those on which it has + # been explicitly disabled. This option is almost always going to be used + # to override the platform on which it has been disabled. If so, platform + # specification (potentially a regex) should be removed from the disable_on + # list. The platforms will be added to the enabled_on list. + # + # The string must exactly match what is in the disable_on list. Regex + # matches are not, and likely will never be, supported. + for s in enabled_on: + if s in t.disabled_on: + t.disabled_on.remove(s) + else: + t.enabled_on.append(s) + +# Override the xfail property of the test. +def override_xfail(xfail: bool, t: Test) -> None: + message('Overriding "xfail" in {}', t.sources[0]) + t.xfail = xfail + +# Override the properties of the test based on the attributes from the override +# file. +def override_test(attrs: dict, t: Test) -> None: + for attr, val in attrs.items(): + if attr == 'disabled_on': + override_disabled_on(val, t) + elif attr == 'enabled_on': + override_enabled_on(val, t) + elif attr == 'xfail': + override_xfail(val, t) + else: + error('Unknown attribute "{}" in key "{}"', attr, main) + # Setup the argument parser and return it. def get_argument_parser(): ap = argparse.ArgumentParser( @@ -562,17 +684,29 @@ def main() -> int: 'run': 0 } for d in dirs: - printf('{}', d) + message('In {}', d) tests = collect_tests(d) if not tests: continue + # Process an override file if one exists. It is probably not beneficial + # to force every subdirectory to have an override file since it is + # unlikely that a large number of tests will need to be overridden. + override = os.path.join(d, 'override.yaml') + if os.path.exists(override): + message('Found override file: {}', override) + yml = parse_override_file(override) + for t in tests: + main: str = t.sources[0] + if main in yml: + override_test(yml[main], t) + existing = [] config_file = os.path.join(d, 'tests.cmake') if os.path.exists(config_file): - message('Backing up test configuration') existing = parse_tests(config_file) if args.backup: + message('Backing up test configuration') shutil.move(config_file, config_file + '.bak') else: message('Test configuration not found') diff --git a/SingleSource/Benchmarks/CoyoteBench/huffbench.c b/SingleSource/Benchmarks/CoyoteBench/huffbench.c index 1ae6d9adf..495c07de8 100644 --- a/SingleSource/Benchmarks/CoyoteBench/huffbench.c +++ b/SingleSource/Benchmarks/CoyoteBench/huffbench.c @@ -111,25 +111,23 @@ static void heap_adjust(size_t * freq, size_t * heap, int n, int k) // queues and heaps for more explanation. int j; - --heap; - - int v = heap[k]; + int v = heap[k-1]; while (k <= (n / 2)) { j = k + k; - if ((j < n) && (freq[heap[j]] > freq[heap[j+1]])) + if ((j < n) && (freq[heap[j-1]] > freq[heap[j]])) ++j; - if (freq[v] < freq[heap[j]]) + if (freq[v] < freq[heap[j-1]]) break; - heap[k] = heap[j]; + heap[k-1] = heap[j-1]; k = j; } - heap[k] = v; + heap[k-1] = v; } // Huffman compression/decompression function diff --git a/SingleSource/Regression/C/gcc-c-torture/execute/loop-15.c b/SingleSource/Regression/C/gcc-c-torture/execute/loop-15.c index 8cb5125a2..45d02c9bd 100644 --- a/SingleSource/Regression/C/gcc-c-torture/execute/loop-15.c +++ b/SingleSource/Regression/C/gcc-c-torture/execute/loop-15.c @@ -3,10 +3,11 @@ void foo (unsigned long *start, unsigned long *end) { - unsigned long *temp = end - 1; - - while (end > start) - *end-- = *temp--; + while (end > start) + { + *end = *(end - 1); + --end; + } } int diff --git a/test-suite-externals/.gitkeep b/test-suite-externals/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/tools/test/CMakeLists.txt b/tools/test/CMakeLists.txt index e7755154b..a724da14f 100644 --- a/tools/test/CMakeLists.txt +++ b/tools/test/CMakeLists.txt @@ -1,22 +1,27 @@ # Copy these files to the build directory so that the tests can be run even # without the source directory. -configure_file(test_not.sh test_not.sh - @ONLY) +configure_file(test_not.py test_not.py + COPYONLY) -add_executable(ret1 ret1.c) -llvm_test_run(EXECUTABLE "%b/not" "%b/test/ret1") +llvm_test_executable_no_test(ret1 ret1.c) +add_dependencies(ret1 not) +llvm_test_run(EXECUTABLE "$" "$") llvm_add_test_for_target(ret1) -add_executable(ret0 ret0.c) -llvm_test_run(EXECUTABLE "%b/not" "%b/not" "%b/test/ret0") +llvm_test_executable_no_test(ret0 ret0.c) +add_dependencies(ret0 not) +llvm_test_run(EXECUTABLE "$" "$" "$") llvm_add_test_for_target(ret0) # Check that expected crashes are handled correctly. -add_executable(abrt abort.c) -llvm_test_run(EXECUTABLE "%b/not" "--crash" "%b/test/abrt") +llvm_test_executable_no_test(abrt abort.c) +add_dependencies(abrt not) +llvm_test_run(EXECUTABLE "$" "--crash" "$") llvm_add_test_for_target(abrt) # Check that not passes environment variables to the called executable. -add_executable(check_env check_env.c) -llvm_test_run(EXECUTABLE "/bin/bash" "%b/test/test_not.sh %b") -llvm_add_test(test_not.test test_not.sh) +find_package(Python COMPONENTS Interpreter) +llvm_test_executable_no_test(check_env check_env.c) +add_dependencies(check_env not) +llvm_test_run(EXECUTABLE ${Python_EXECUTABLE} "%b/test/test_not.py" "$" "$") +llvm_add_test_For_target(check_env) diff --git a/tools/test/test_not.py b/tools/test/test_not.py new file mode 100644 index 000000000..8eafa5368 --- /dev/null +++ b/tools/test/test_not.py @@ -0,0 +1,7 @@ +import os +import subprocess +import sys + +os.environ["SET_IN_PARENT"] = "something" +out = subprocess.run([sys.argv[1], sys.argv[2]]) +sys.exit(out.returncode) diff --git a/tools/test/test_not.sh b/tools/test/test_not.sh deleted file mode 100644 index 04bdd953b..000000000 --- a/tools/test/test_not.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env sh -# -# USAGE: test_not.sh ${bindir} -# -# where bindir is ${CMAKE_BINARY_DIR}/tools - -export SET_IN_PARENT="something" -$1/not $1/test/check_env