<?xml version="1.0"?>
<!DOCTYPE flagsdescription SYSTEM
       "http://www.spec.org/dtd/cpuflags2.dtd">

<!--  This file defines flags for use with The Intel oneAPI DPC++/C++ and The Intel C++ Compiler Classic Compilers  -->
<flagsdescription>
<filename>Intel_compiler_flags</filename>
<title>Intel Compiler, Optimization and Other flags for use by SPEChpc</title>
<!--  Style  -->
<style>
<![CDATA[ body { margin: 1em; border: 0; padding: 0; background-repeat: no-repeat; background-attachment: fixed; background-position: 100% 0; color: black; font-family: "Times Roman", times, serif; } div.flagDesc { clear: both; color: black; background-color: #d6e7f7; border: 1px solid #blue; margin: 0 auto; width: 90%; } ul.flagTOC { list-style-type: none; margin: 0; padding: 0; } ul.flagTOC > li { border: 1px solid #d6e7f7; background: #d6e7f7; } ul.flagTOC > li > a:link { color: blue; } ul.SubMenu li { border: 1px solid #d6e7f7; /* rgb(211, 211, 211); */ } ul.SubMenu { border: 1px solid blue; background-color: #d6e7f7; } ]]>
</style>
<!--  Header  -->
<header>
<![CDATA[ <div id='banner'> <h2><b>Compilers: Intel Fortran/C/C++</b></h2> <h2><b>Operating systems: Linux</b></h2> </div> ]]>
</header>
<!--  Compilers  -->
<flag name="mpifort" class="compiler" regexp="mpifort">
<![CDATA[ <p>The OpenMPI Fortran driver configured for use with the Intel Fortran compiler.</p> ]]>
<example>mpifort</example>
</flag>
<flag name="mpicxx" class="compiler" regexp="mpicxx">
<![CDATA[ <p>The OpenMPI C++ driver configured for use with the Intel C++ compiler.</p> ]]>
<example>mpicxx</example>
</flag>
<flag name="mpicc" class="compiler" regexp="mpicc">
<![CDATA[ <p>The OpenMPI C driver configured for use with the Intel C compiler.</p> ]]>
<example>mpicc</example>
</flag>
<flag name="mpiifx" class="compiler" regexp="mpiifort.* -fc=ifx">
<![CDATA[ <p>The Intel MPI Fortran driver configured for use with the Intel oneAPI Fortran compiler.</p> ]]>
</flag>
<flag name="mpiicpx" class="compiler" regexp="mpiicpc.* -cxx=icpx">
<![CDATA[ <p>The Intel MPI C++ driver configured for use with the Intel oneAPI C++ compiler.</p> ]]>
</flag>
<flag name="mpiicx" class="compiler" regexp="mpiicc.* -cc=icx">
<![CDATA[ <p>The Intel MPI C driver configured for use with the Intel oneAPI C compiler.</p> ]]>
</flag>
<flag name="ifx" class="compiler" regexp="ifx">
<![CDATA[ <p>The Intel oneAPI Fortran compiler.</p> ]]>
</flag>
<flag name="icpx" class="compiler" regexp="icpx">
<![CDATA[ <p>The Intel oneAPI C++ compiler.</p> ]]>
</flag>
<flag name="icx" class="compiler" regexp="icx">
<![CDATA[ <p>The Intel oneAPI C compiler.</p> ]]>
</flag>
<flag name="mpiifort" class="compiler" regexp="mpiifort">
<![CDATA[ <p>The Intel MPI Fortran driver configured for use with the Intel Classic Fortran compiler.</p> ]]>
<example>mpiifort</example>
</flag>
<flag name="mpiicpc" class="compiler" regexp="mpiicpc">
<![CDATA[ <p>The Intel MPI C++ driver configured for use with the Intel Classic C++ compiler.</p> ]]>
<example>mpiicpc</example>
</flag>
<flag name="mpiicc" class="compiler" regexp="mpiicc">
<![CDATA[ <p>The Intel MPI C driver configured for use with the Intel Classic C compiler.</p> ]]>
<example>mpiicc</example>
</flag>
<!--  Portability, Other Flags.  -->
<flag name="lstdc" class="portability" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx" regexp="-lstdc\+\+">
<![CDATA[ <p>USE std C++ libs on Linker </p> ]]>
</flag>
<flag name="DSPEC_LP64" class="portability" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx" regexp="-DSPEC_LP64\b">
<![CDATA[ <p> !!!!!!!!!!!!!!!!!!!!!!!!!. </p> ]]>
</flag>
<flag name="D_OPENMP201411" class="portability" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx" regexp="-D_OPENMP=201411\b">
<![CDATA[ <p> Disable use of reduction with variable array reduction variable (OpenMP 4.5, OpenACC 2.7) even if compiler reports support. </p> ]]>
</flag>
<flag name="DSPEC_NO_VAR_ARRAY_REDUCE" class="portability" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx" regexp="-DSPEC_NO_VAR_ARRAY_REDUCE\b">
<![CDATA[ <p> Disable use of reduction with variable array reduction variable (OpenMP 4.5, OpenACC 2.7) even if compiler reports support. </p> ]]>
</flag>
<flag name="DUSE_MPI" class="portability" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx" regexp="-DUSE_MPI\b">
<![CDATA[ <p> Mandatory flag enabling MPI by default. </p> ]]>
</flag>
<flag name="c99" class="portability" compilers="mpicc, mpicxx, mpifort" regexp="-std=c99\b">
<![CDATA[ <p>Use C99 language features.</p> ]]>
</flag>
<flag name="stdcxx14" class="portability" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx" regexp="-std=c\+\+14\b">
<![CDATA[ <p>Use C++ 14 language features.</p> ]]>
<example>-std=c++14</example>
</flag>
<flag name="cxx17" class="portability" compilers="mpicxx" regexp="--c\+\+17\b">
<![CDATA[ <p>Use C++ 17 language features.</p> ]]>
<example>--c++17</example>
</flag>
<flag name="cxx14" class="portability" compilers="mpicxx" regexp="--c\+\+14\b">
<![CDATA[ <p>Use C++ 14 language features.</p> ]]>
<example>--c++14</example>
</flag>
<flag regexp="-80" class="portability" name="port_80"> FPORTABILITY flag </flag>
<flag regexp="-nofor-main" class="portability" name="port_noformain"> No Fortran main method exists, use C equivalent instead. </flag>
<flag regexp="-shared-intel" class="portability" name="shared_intel"> link Intel provided libraries dynamically </flag>
<flag regexp="-Wl,--no-relax" class="portability" name="Wl"> Pass the comma separated arguments to the linker </flag>
<flag name="f-mcmodel" class="portability" regexp="-mcmodel=(small|medium|large)(?=\s|$)">
<![CDATA[ <p> use a specific memory model to generate code and store data<br /> small - Restricts code and data to the first 2GB of address space (DEFAULT)<br /> medium - Restricts code to the first 2GB; it places no memory restriction on data<br /> large - Places no memory restriction on code or data</p> ]]>
</flag>
<!--  Fortran Optimization Flags  -->
<flag name="align32" class="optimization" regexp="-align array32byte">
<example>-align array32byte specifies that analyzed arrays should be aligned to 32byte boundaries</example>
<![CDATA[ <p>The align toggle changes how data elements are aligned. Variables and arrays are analyzed and memory layout can be altered. Specifying array64byte will look for opportunities to transform and reailgn arrays to 32byte boundaries.</p> ]]>
</flag>
<flag name="align64" class="optimization" regexp="-align array64byte">
<example>-align array64byte specifies that analyzed arrays should be aligned to 64byte boundaries</example>
<![CDATA[ <p>The align toggle changes how data elements are aligned. Variables and arrays are analyzed and memory layout can be altered. Specifying array64byte will look for opportunities to transform and reailgn arrays to 64byte boundaries.</p> ]]>
</flag>
<flag name="f_2003_std_realloc" class="optimization" regexp="-nostandard-realloc-lhs(?=\s|$)">
<example>Determines whether the compiler uses the current Fortran Standard rules or the old Fortran 2003 rules when interpreting assignment statements.</example>
<![CDATA[ <p>Option standard-realloc-lhs (the default), tells the compiler that when the left-hand side of an assignment is an allocatable object, it should be reallocated to the shape of the right-hand side of the assignment before the assignment occurs. This is the current Fortran Standard definition. This feature may cause extra overhead at run time. This option has the same effect as option assume realloc_lhs.</p> <p>If you specify nostandard-realloc-lhs, the compiler uses the old Fortran 2003 rules when interpreting assignment statements. The left-hand side is assumed to be allocated with the correct shape to hold the right-hand side. If it is not, incorrect behavior will occur. This option has the same effect as option assume norealloc_lhs.</p> ]]>
</flag>
<flag name="auto" class="optimization" regexp="-auto">
<example>Causes all local, non-SAVEd variables to be allocated to the runtime stack.</example>
<![CDATA[ <p>This option places local variables (scalars and arrays of all types), except those declared as SAVE, on the runtime stack. It is as if the variables were declared with the AUTOMATIC attribute. It does not affect variables that have the SAVE attribute or ALLOCATABLE attribute, or variables that appear in an EQUIVALENCE statement or in a common block. This option may provide a performance gain for your program, but if your program depends on variables having the same value as the last time the routine was invoked, your program may not function properly. If you want to cause variables to be placed in static memory, specify option [Q]save. If you want only scalar variables of certain intrinsic types to be placed on the runtime stack, specify option auto-scalar.</p> ]]>
</flag>
<!--  Optimization Flags  -->
<flag name="f-qopt-zmm-usage" class="optimization" regexp="-qopt-zmm-usage=(low|high)(?=\s|$)">
<![CDATA[ <p> -qopt-zmm-usage=<keyword><br /> Specifies the level of zmm registers usage. You can specify one of the following:<br /> low - Tells the compiler that the compiled program is unlikely to benefit from zmm registers usage. It specifies that the compiler should avoid using zmm registers unless it can prove the gain from their usage.<br /> high - Tells the compiler to generate zmm code without restrictions</p> ]]>
</flag>
<flag name="mprefer-vector-width-512" class="optimization" regexp="-mprefer-vector-width=512">
<![CDATA[ <p> Specifies preferred 512b vector width for auto-vectorization. Defaults to 'none' which allows target specific decisions.</p> ]]>
</flag>
<flag name="f-ipo" class="optimization" regexp="-ipo\b">
<![CDATA[ <p>Multi-file ip optimizations that includes:<br /> - inline function expansion<br /> - interprocedural constant propogation<br /> - dead code elimination<br /> - propagation of function characteristics<br /> - passing arguments in registers<br /> - loop-invariant code motion</p> ]]>
</flag>
<flag name="f-no-prec-div" class="optimization" regexp="-no-prec-div">
(disable/enable[default] -[no-]prec-div)
<![CDATA[ <p>-prec-div improves precision of floating-point divides. It has a slight impact on speed. -no-prec-div disables this option and enables optimizations that give slightly less precise results than full IEEE division.</p> <p>When you specify -no-prec-div along with some optimizations, such as -xN and -xB (Linux) or /QxN and /QxB (Windows), the compiler may change floating-point division computations into multiplication by the reciprocal of the denominator. For example, A/B is computed as A * (1/B) to improve the speed of the computation.</p> <p>However, sometimes the value produced by this transformation is not as accurate as full IEEE division. When it is important to have fully precise IEEE division, do not use -no-prec-div which will enable the default -prec-div and the result is more accurate, with some loss of performance.</p> ]]>
</flag>
<flag name="f-fimf-precision" class="optimization" regexp="-fimf-precision=(high|medium|low)($|[a-z\,\:]+|)">
<![CDATA[ <p> -fimf-precision=value[:funclist]<br /> defines the accuracy (precision) for math library functions<br /> value - defined as one of the following values<br /> high - equivalent to max-error = 0.6<br /> medium - equivalent to max-error = 4 (DEFAULT)<br /> low - equivalent to accuracy-bits = 11 (single precision); accuracy-bits = 26 (double precision)<br /> funclist - optional comma separated list of one or more math library functions to which the attribute should be applied</p> ]]>
</flag>
<flag name="f-fimf-accuracy-bits-sqrt" class="optimization" regexp="-fimf-accuracy-bits-sqrt=14\b"> Define the relative error, measured by the number of correct bits,for math library function results </flag>
<flag name="f-qopt-dynamic-align" class="optimization" regexp="-qopt-dynamic-align\b"> Enable peeling to optimize alignment for vectorization. </flag>
<flag name="f-fvec-peel-loops" class="optimization" regexp="-fvec-peel-loops\b"> Enables peel loop vectorization. </flag>
<flag name="f-ansi-alias" class="optimization" regexp="-ansi-alias\b"> Enable/disable(DEFAULT) use of ANSI aliasing rules in optimizations; user asserts that the program adheres to these rules. </flag>
<flag name="f-switch" class="optimization" regexp="-switch .*\b"> TODO </flag>
<flag name="f-fopenmp-declare-target-scalar-defaultmap" class="optimization" regexp="-fopenmp-declare-target-scalar-defaultmap=firstprivate\b"> Assume that a scalar declare target variable with implicit data-mapping referenced in a 'target' construct has the same value in the host and device environment </flag>
<flag name="f-Xclang" class="optimization" regexp="-Xclang\b"> Pass argument to clang -cc1 </flag>
<flag name="f-Xopenmp-target-backend" class="optimization" regexp="-Xopenmp-target-backend '.*'">
<![CDATA[ <p> Pass arg to the OpenMP based target backend.<br /> -device &lt;arch&gt; - set target device.<br /> -revision_id &lt;revision_id&gt; - Target stepping. Can be decimal or hexadecimal value.<br /> -cl-fast-relaxed-math - Sets the optimization options -cl-finite-math-only and -cl-unsafe-math-optimizations, which enable optimizations for floating-point arithmetic that may violate the IEEE 754 standard and the OpenCL numerical compliance requirements.<br /> The device and revision_id options are only needed for AOT mode.</p> ]]>
</flag>
<flag name="f-fopenmp-targets" class="optimization" regexp="-fopenmp-targets=(spir64_gen|spir64|spir64_x86_64)(=$|[a-z\,\:\=\-&quot;]+|)">
<![CDATA[ <p> Enables offloading to a specified GPU target if OpenMP features have been enabled. <br /> Is a target triple device name. The following triplets are supported. <br /> spir64 - Tells the compiler to enable offloading to SPIR64-based devices. <br /> spir64_x86_64 - Tells the compiler to enable offloading to Intel CPUs. <br /> spir64_gen - Tells the compiler to enable offloading to Intel Proessor Graphics.</p> ]]>
</flag>
<flag name="f-fopenmp-target-loopopt" class="optimization" regexp="-fopenmp-target-loopopt\b"> Enables the loop optimizer and auto-vectorization for OpenMP offloading device compilation when option O2 or higher is set or specified. </flag>
<flag name="f-ftarget-register-alloc-mode" class="optimization" regexp="-ftarget-register-alloc-mode=pvc:(default|small|large|auto)($|[a-z\,\:]+|)">
<![CDATA[ <p> -ftarget-register-alloc-mode=device-name:reg-mode[, device-name:reg-mode][,...]<br /> device-name - Is the device name. Currently, you can only specify the following: pvc Indicates a Ponte Vecchio (PVC) device.<br /> reg-mode - Is the register allocation mode. It can be any of the following: <br /> 
default - Tells the target backend to not impose any specification when choosing a register allocation mode. <br /> small - Tells the target backend to select small register allocation mode (for PVC, this means to use the 128 register file). <br /> large - Tells the target backend to select large register allocation mode (for PVC, this means to use the 256 register file). <br /> auto - Tells the target backend to use internal heuristics to select a register allocation mode based on kernel analysis. </p> ]]>
</flag>
<flag name="f-fopenmp-optimistic-collapse" class="optimization" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx" regexp="-fopenmp-optimistic-collapse\b">
<![CDATA[ <p>When using the collapse clause on a loop nest the default behavior is to automatically extend the representation of the loop counter to 64 bits for the cases where the sizes of the collapsed loops are not known at compile time. To prevent this conservative choice and use at most 32 bits, compile your program with the -fopenmp-optimistic-collapse.</p> ]]>
</flag>
<flag name="CORE-AVX2" class="optimization" regexp="-march=core-avx2(?=\s|$)">
<![CDATA[ <p>May generate Intel® AVX2, AVX, SSE4.2, SSE4.1, SSSE3, SSE3, SSE2 and SSE instructions for Intel® procesr. Optimizes for 4th, 5th and 6th generation Intel® Co processors and the Intel® Xeon® Processor E3 v3, E5 v3, E7 v3, E3 v4, E5 v4 and E7 v4 familie Available in compiler versions 13 and later.</p> ]]>
</flag>
<flag name="xCORE-AVX512" class="optimization" regexp="-xCORE-AVX512(?=\s|$)">
<![CDATA[ <p>Code is optimized for Intel(R) processors with support for AVX instructions. May generate Intel® AVX-12 Foundation instructions,Intel® AVX-512 Conflict Detectio instructions, Intel® AVX-512 Doubleword and Quadword instructions, Intel® AVX-51 Byte and Word instructions, Intel® AVX-512 Vector Length extensions, Intel® AVX2,VX SSE4.2, SSE4.1, SSSE3, SSE3, SSE2 and SSE instructions for Intel® processors. Optimizes for a future Intel® processor. Available in compiler version 15 update 1 and later. </p> ]]>
</flag>
<flag name="fopenmp" class="optimization" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx, icx, icpx, ifx" regexp="-fopenmp\b">
<![CDATA[ <p>Parse OpenMP pragmas and generate parallel code. Advanced users who prefer to use OpenMP* as it is implemented by the LLVM community can get most of that functionality by using -fopenmp.</p> ]]>
</flag>
<flag name="fiopenmp" class="optimization" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx, icx, icpx, ifx" regexp="-fiopenmp\b">
<![CDATA[ <p>Enable the compiler to generate multi-threaded code based on the OpenMP* directives. Similar behavior was granted by -qopenmp in previous versions.</p> ]]>
</flag>
<flag name="qopenmp" class="optimization" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx" regexp="-qopenmp\b">
<![CDATA[ <p>Enable OpenMP compilation for hpc2021.</p> ]]>
</flag>
<flag name="DSPEC_OPENMP" class="optimization" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx" regexp="-DSPEC_OPENMP\b">
<![CDATA[ <p>Enable OPENMP programming model for hpc2021.</p> ]]>
</flag>
<flag name="DSPEC_ACCEL_AWARE_MPI" class="optimization" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx" regexp="-DSPEC_ACCEL_AWARE_MPI\b">
<![CDATA[ <p>Definition of this macro indicates that the MPI implementation supports accelerator device-to-device transfers. Used in conjuction when using OpenACC or OpenMP w/ target offload.</p> ]]>
</flag>
<flag name="DSPEC_COLLAPSE" class="optimization" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx" regexp="-DSPEC_COLLAPSE\b">
<![CDATA[ <p>AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA.</p> ]]>
</flag>
<flag name="Ofast" class="optimization" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx, icx, icpx, ifx" regexp="-Ofast?s?e?\b">
<![CDATA[ <p>Enable -O3 -no-prec-div -fp-model fast=2 optimizations.</p> ]]>
<example>-Ofast</example>
</flag>
<flag name="f-O3" class="optimization" regexp="-O3(?=\s|$)">
<![CDATA[ Optimize for maximum speed and enable more aggressive optimizations that may not improve performance on some programs. ]]>
</flag>
<flag name="f-qopt-multiple-gather-scatter-by-shuffles" class="optimization" regexp="-qopt-multiple-gather-scatter-by-shuffles(?=\s|$)"> Determine if certain square root optimizations are enabled. </flag>
<flag name="f-qopt-streaming-stores" class="optimization" regexp="-qopt-streaming-stores (always|auto|never)">
<![CDATA[ <p>Specifies whether streaming stores are generated:</p> <p>always - enables generation of streaming stores under the assumption that the application is memory bound</p> <p>auto - compiler decides when streaming stores are used (DEFAULT)</p> <p>never - disables generation of streaming stores</p> ]]>
</flag>
<flag name="flto" class="optimization" regexp="-flto\b">
<![CDATA[ <p>Enable LTO (Link Time Optimization) in 'full' mode.</p> ]]>
</flag>
<flag name="ffast-math" class="optimization" regexp="-ffast-math\b">
<![CDATA[ <p>Allow aggressive, lossy floating-point optimizations.</p> ]]>
</flag>
<flag name="funroll-loops" class="optimization" regexp="-funroll-loops\b">
<![CDATA[ <p>Turn on loop unroller.</p> ]]>
</flag>
<flag name="W" class="other" compilers="mpicc, mpicxx, mpifort, mpiicc, mpiicpc, mpiifort, mpiicx, mpiicpx, mpiifx" regexp="-W.*">
<![CDATA[ <p>Enable the specified warning.</p> ]]>
<example>-Wno-incompatible-function-pointer-types</example>
</flag>
</flagsdescription>
