<?xml version="1.0"?>
<!DOCTYPE flagsdescription SYSTEM "http://www.spec.org/dtd/cpuflags1.dtd">
<flagsdescription>

<!-- filename to begin with "macosx-iccifort-v10.1-flags-file" -->
<filename>macosx-iccifort-v10.1-flags-file-20080408</filename>

<title>SPEC CPU2006 Flags Disclosure for the Intel Compilers (v10.1) on Mac OSX</title>

<header>
<![CDATA[
<p>Last updated: 8-Apr-2008</p>
<p>This flags disclosure file describes the compiler flags associated with
the following Intel compilers: </p>
<ul>
  <li>Intel C++ Compiler for Mac OSX v10.1</li>
  <li>Intel Fortran Compiler for Mac OSX v10.1</li>
</ul>
]]>
</header>

<!--
********************************************************************************
* Platform Settings
********************************************************************************
-->
<platform_settings>
<![CDATA[
<p><b>Platform settings</b></p>
<p>The system under test is deemed reasonably quiet by turning off the following
from the System Preferences panel: </p>
<ul> 
<li>Automatic Software Updates (turned ON by default)
<li>Screen Savers (turned ON by default)
<li>Unused wireless and bluetooth connectivity (turned ON by default)
<li>Network time syncrhonization (turned ON by default)
</ul>
<p><b>OMP_NUM_THREADS</b></p>
<p>
	Sets the maximum number of threads to use for OpenMP* parallel regions if no
other value is specified in the application.  This environment variable applies to both
-openmp and -parallel (Linux and Mac OS X).  
<br>Example syntax on a Mac OS X system with 8 cores:
<br>export OMP_NUM_THREADS=8
</p>


]]>
</platform_settings>

<!--
********************************************************************************
* Compilers
********************************************************************************
-->

<flag name="intel_icc" class="compiler" regexp="(?:/\S+/)?icc\b">
  <example>icc</example>
  <![CDATA[
    <p>icc invokes the Intel C++ compiler . It is invoked as:</p>
    <p>icc [ options ] file1 [ file2 ... ]</p>
    <p>where, </p>
    <ul>
      <li>options: represent zero or more compile options</li> 
      <li>fileN: is  a  C/C++  source (.C .c .cc .cp .cpp .cxx .c++ .i), 
          assembly (.s), object (.o), static library (.a), or other linkable
          file.</li>
    </ul>
    <p>Invoking the compiler using icc compiles .c and .i files as C. 
       Using icc only links in C++ libraries if C++ source is provided
       on the command line.
    </p>
  ]]>
</flag>

<flag name="intel_icc_64bit" class="compiler" regexp="\/opt\/.*\/cce\/.*icc\b">

    <![CDATA[
      <p>invokes the Intel C compiler for Intel 64 applications </p>
    ]]>

</flag>

<flag name="intel_icpc_64bit" class="compiler" regexp="\/opt\/.*\/cce\/.*icpc\b">

    <![CDATA[
      <p>invokes the Intel C++ compiler for Intel 64 applications </p>
    ]]>

</flag>

<flag name="intel_icc_32bit" class="compiler" regexp="\/opt\/.*\/cc\/.*icc\b">

    <![CDATA[
      <p>invokes the Intel C compiler for Intel 32 applications </p>
    ]]>

</flag>

<flag name="intel_icpc_32bit" class="compiler" regexp="\/opt\/.*\/cc\/.*icpc\b">

    <![CDATA[
      <p>invokes the Intel C++ compiler for Intel 32 applications </p>
    ]]>

</flag>

<flag name="intel_ifort_32bit" class="compiler" regexp="\/opt\/.*\/fc\/.*ifort\b">

    <![CDATA[
      <p>invokes the Intel Fortran compiler for Intel 32 applications </p>
    ]]>

</flag>

<flag name="intel_ifort_64bit" class="compiler" regexp="\/opt\/.*\/fce\/.*ifort\b">

    <![CDATA[
      <p>invokes the Intel Fortran compiler for Intel 64 applications </p>
    ]]>

</flag>

<flag name="intel_icpc" class="compiler" regexp="(?:/\S+/)?icpc\b">
  <example>icpc</example>
  <![CDATA[
    <p>The icpc command uses the same compiler options as 
       the icc command. Invoking the compiler using icpc compiles .c, and
       .i  files as C++. Using icpc always links in C++ libraries.
    </p>
  ]]>
</flag>

<flag name="intel_ifort" class="compiler" regexp="(?:/\S+/)?ifort\b">
  <example>ifort</example>
  <![CDATA[
    <p>ifort invokes the Intel Fortran compiler. It is invoked as:</p>
    <p>ifort [ options ] file1 [ file2 ... ]</p>
    <p>where, </p>
    <ul>
      <li>options: represent zero or more compile options</li>
      <li>fileN: is a Fortran source file, assembly file, object
          file, object library, or other linkable file. </li>
    </ul>
  ]]>
</flag>


<flag name="intel_c99" class="compiler" regexp="(?:/\S+/)?-c99\b">
   <![CDATA[
      Invoke the Intel C++ compiler in C99 mode for Mac OSX.
   ]]>
</flag>


<flag name="intel64_c_compiler_include_path" class="compiler" regexp="(?:/\S+/)?-I\/opt\/intel\/cce\/.*\/include">
         	 Compiler option to set the path for include files.
         	 Used in some integer peak benchmarks which were built using the Intel 64-bit C++ compiler.
</flag>

<flag name="intel64_c_compiler_library_path" class="compiler" regexp="(?:/\S+/)?-L\/opt\/intel\/cce\/.*\/lib">
                 Compiler option to set the path for library files.
         	 Used in some integer peak benchmarks which were built using the Intel 64-bit C++ compiler.
</flag>

<flag name="ia32_c_compiler_include_path" class="compiler" regexp="(?:/\S+/)?-I\/opt\/intel\/cc\/.*\/include">
         	 Compiler option to set the path for include files.
         	 Used in some peak benchmarks which were built using the Intel 32-bit C++ compiler.
</flag>

<flag name="ia32_c_compiler_library_path" class="compiler" regexp="(?:/\S+/)?-L\/opt\/intel\/cc\/.*\/lib">
                 Compiler option to set the path for library files.
         	 Used in some integer peak benchmarks which were built using the Intel 32-bit C++ compiler.
</flag>

<flag name="ia32_f_compiler_include_path" class="compiler" regexp="(?:/\S+/)?-I\/opt\/intel\/fc\/.*\/include">
         	 Compiler option to set the path for include files.
         	 Used in some peak benchmarks which were built using the Intel 32-bit Fortran compiler.
</flag>

<flag name="ia32_f_compiler_library_path" class="compiler" regexp="(?:/\S+/)?-L\/opt\/intel\/fc\/.*\/lib">
                 Compiler option to set the path for library files.
         	 Used in some integer peak benchmarks which were built using the Intel 32-bit Fortran compiler.
</flag>

<flag name="intel64_f_compiler_include_path" class="compiler" regexp="(?:/\S+/)?-I\/opt\/intel\/fce\/.*\/include">
         	 Compiler option to set the path for include files.
         	 Used in some peak benchmarks which were built using the Intel 64-bit Fortran compiler.
</flag>

<flag name="intel64_f_compiler_library_path" class="compiler" regexp="(?:/\S+/)?-L\/opt\/intel\/fce\/.*\/lib">
                 Compiler option to set the path for library files.
         	 Used in some integer peak benchmarks which were built using the Intel 64-bit Fortran compiler.
</flag>


<!--
********************************************************************************
* Portability
********************************************************************************
-->
<flag name="no_for_main" class="portability" regexp="(?:/\S+/)?-nofor_main\b">
   <![CDATA[
      <p>For mixed-language benchmarks, tell the compiler that the main 
      program is not written in Fortran </p>
   ]]>
</flag>

<!--
********************************************************************************
* Optimizations
********************************************************************************
-->
<flag name="f-O1" class="optimization" regexp="-O1\b">
      
   <![CDATA[
      <p>Enables optimizations for speed and disables some optimizations that 
         increase code size and affect speed. To limit code size, this option: </p>
      <ul>
         <li>Enables global optimization; this includes data-flow analysis, 
           code motion, strength reduction and test replacement, split-lifetime
           analysis, and instruction scheduling. 
         <li>Disables intrinsic recognition and intrinsics inlining. 
         <li>Disables loop unrolling. 
      </ul> 

      <p>The O1 option may improve performance for applications with very large 
      code size, many branches, and execution time not dominated by code within loops. </p> 
         
      <p>On IA-32 Mac OSX platforms, -O1 sets the following:</p> 
      <ul>
         <li>-unroll0, 
	 <li>-fno-builtin, 
	 <li>-mno-ieee-fp, 
	 <li>-fomit-frame-pointer (same as -fp), 
	 <li>-ffunction-sections 
      </ul>
   ]]>

   <include flag="f-unrolln"/>
   <include flag="f-no-builtin"/>
   <include flag="f-mno-ieee-fp"/>
   <include flag="f-fomit-frame-pointer"/>
   <include flag="f-ffunction-sections"/>
   
</flag>

<flag name="f-O2" class="optimization" regexp="-O2\b">

   <![CDATA[
      <p>Enables optimizations for speed. This is the generally recommended 
      optimization level. This option also enables: </p>
      <ul>
      <li>Inlining of intrinsics
      <li>Intra-file interprocedural optimizations, which include:
        <ul>
        <li>inlining
        <li>constant propagation
        <li>forward substitution
        <li>routine attribute propagation
        <li>variable address-taken analysis
        <li>dead static function elimination
        <li>removal of unreferenced variables
        </ul>
      <li>The following capabilities for performance gain: 
        <ul>
        <li>constant propagation
        <li>copy propagation
        <li>dead-code elimination
        <li>global register allocation
        <li>global instruction scheduling and control speculation
        <li>loop unrolling
        <li>optimized code selection
        <li>partial redundancy elimination
        <li>strength reduction/induction variable simplification
        <li>variable renaming
        <li>exception handling optimizations
        <li>tail recursions
        <li>peephole optimizations
        <li>structure assignment lowering and optimizations
        <li>dead store elimination
	</ul>
      </ul>
   ]]> 

</flag>

<flag name="f-O3" class="optimization" regexp="-O3\b">

   <![CDATA[
      <p>Enables O2 optimizations plus more aggressive optimizations, 
         such as prefetching, scalar replacement, and loop and memory 
         access transformations. </p>
      <p>Enables optimizations for maximum speed, such as: </p>
      <ul>
        <li>Loop unrolling, including instruction scheduling
        <li>Code replication to eliminate branches
        <li>Padding the size of certain power-of-two arrays to allow 
	  more efficient cache use
      </ul>

      <p>On IA-32 and Intel EM64T processors, when O3 is used with options 
         -ax or -x (Linux/Mac OSX), the compiler performs more aggressive 
	 data dependency analysis than for O2, which may result in 
	 longer compilation times. </p>
      <p>
         The O3 optimizations may not cause higher performance unless loop and 
         memory access transformations take place. The optimizations may slow 
         down code in some cases compared to O2 optimizations.
      <p>
         The O3 option is recommended for applications that have loops that heavily 
         use floating-point calculations and process large data sets. 
         </p>
   ]]> 
 
</flag>

<flag name="f-ip" class="optimization" regexp="-ip\b">
   <![CDATA[
     <p>This option enables additional interprocedural optimizations for single 
      file compilation. These optimizations are a subset of full intra-file 
      interprocedural optimizations. One of these optimizations enables the 
      compiler to perform inline function expansion for calls to functions 
      defined within the current source file.
   ]]> 
</flag>

<flag name="f-ipo" class="optimization" regexp="-ipo\b">
   <![CDATA[
      <p>This option enables multi-file interprocedural optimizations that includes:
      <ul>
       <li>inline function expansion
       <li>interprocedural constant propogation
       <li>dead code elimination
       <li>propagation of function characteristics
       <li>passing arguments in registers
       <li>loop-invariant code motion
      </ul>

      <p>When you specify this option, the compiler  performs
         inline  function  expansion  for  calls to functions
	 defined in separate files.
       </p>
   ]]> 
</flag>

<flag name="f-fast" class="optimization" regexp="-fast\b">

   <![CDATA[
      <p>The -fast option enhances execution speed across the entire program 
      by including the following options that can improve run-time performance:</p>

      <ul>
      <li>-xT (optimizations for Intel Core 2 Duo processor family)
      <li>-O3 (maximum speed and high-level optimizations)
      <li>-ipo (enables interprocedural optimizations across files)
      <li>-no-prec-div (disable -prec-div), where -prec-div 
          improves precision of FP divides (some speed impact)
      <li>-mdynamic-no-pic, where -mydynamic-no-pic indicates that code is not relocatable
      </ul>
   
      <p>Options set by -fast cannot be overidden, list options separately to
      change behavior. The options set by -fast may change from release to release.</p>
   ]]> 

   <include flag="f-xT"/>
   <include flag="f-O3"/>
   <include flag="f-ipo"/>
   <include flag="f-no-prec-div"/>
   <include flag="f-mdynamic-no-pic"/>   
</flag>

<flag name="f-xT" class="optimization" regexp="-xT\b">
<![CDATA[
      <p>The -xT option tells the compiler to generate optimized code for the Intel Core 2 Duo processor family. It
      can generate SSSE3, SSE3, SSE2, and SSE instructions for the Intel processors. </p>  
]]>
</flag>

<flag name="f-m32" class="optimization" regexp="-m32\b">
<![CDATA[
      <p>Tells the compiler to generate code for IA-32 architecture. If this flag is not specified, the compiler 
      generates code based on whether 32-bit or the 64-bit compiler is in the search path. </p>  
]]>
</flag>

<flag name="f-m64" class="optimization" regexp="-m64\b">
<![CDATA[
      <p>Tells the compiler to generate code for EM64T architecture. If this flag is not specified, the compiler 
      generates code based on whether 32-bit or the 64-bit compiler is in the search path. </p>  
]]>
</flag>

<flag name="f-parallel" class="optimization" regexp="-parallel\b">
<![CDATA[
      <p>Enables the compiler to generate runtime control code for effective automatic parallelization </p>  
]]>
</flag>

<flag name="f-par-runtime-control" class="optimization" regexp="-par-runtime-control\b">
<![CDATA[
      <p>Tells the auto-parallelizer to generated multithreaded code for loops that can be safely executed in parallel. 
      To use this option, you must also use option O2 or O3. </p>  
]]>
</flag>

<flag name="f-lstmalloc" class="optimization" regexp="-lstmalloc\b">
<![CDATA[
      <p>Tells the compiler to link in the optimized malloc implementation that resides under /usr/lib. </p>  
]]>
</flag>

<flag name="f-Link32BitCXXLibraries" class="optimization" regexp="-L/opt/intel/cc/\d*\.\d*\.\d{3}/lib\b">
<![CDATA[
      <p>Links the 32-bit Intel's C++ compiler libraries.  </p>  
]]>
</flag>

<flag name="f-Link64BitCXXLibraries" class="optimization" regexp="-L/opt/intel/cce/\d*\.\d*\.\d{3}/lib\b">
<![CDATA[
      <p>Links the 64-bit Intel's C++ compiler libraries.  </p>  
]]>
</flag>

<flag name="f-Link32BitFortranLibraries" class="optimization" regexp="-L/opt/intel/fc/\d*\.\d*\.\d{3}/lib\b">
<![CDATA[
      <p>Links the 32-bit Intel's Fortran compiler libraries.  </p>  
]]>
</flag>

<flag name="f-Link64BitFortranLibraries" class="optimization" regexp="-L/opt/intel/fce/\d*\.\d*\.\d{3}/lib\b">
<![CDATA[
      <p>Links the 64-bit Intel's Fortran compiler libraries.  </p>  
]]>
</flag>


<flag name="f-mdynamic-no-pic" regexp="-mdynamic-no-pic\b" class="optimization">
<![CDATA[
<p>Code is not relocatable, but external references are relocatable.</p>
]]>
</flag>

<flag name="f-no-prec-div" class="optimization" regexp="-no-prec-div">
   <![CDATA[
      <p>This option improves precision of floating-point divides. It has a slight 
      impact on speed.</p>

      <p>With some optimizations, such as -xN and -xB (Linux) or /QxN and /QxB (Windows), 
      the compiler may change floating-point division computations into multiplication 
      by the reciprocal of the denominator. For example, A/B is computed as 
      A * (1/B) to improve the speed of the computation. </p>

      <p>However, sometimes the value produced by this transformation is 
      not as accurate as full IEEE division. When it is important to have fully 
      precise IEEE division, use this option to disable the floating-point 
      division-to-multiplication optimization. The result is more accurate, with some 
      loss of performance.</p>

      <p>If you specify -no-prec-div (Linux and Mac OSX), it enables 
      optimizations that give slightly less precise results than full IEEE 
      division. The default is -prec-div.</p>
   ]]> 

</flag>

<flag name="f-prof_gen" class="optimization" regexp="-prof_gen\b">

   <![CDATA[
      <p>Instrument program for profiling for the first phase of
      two-phase profile guided optimization. This instrumentation gathers information
      about a program's execution paths and data values but does not gather
      information from hardware performance counters. The profile instrumentation
      also gathers data for optimizations which are unique to profile-feedback
      optimization.</p>
   ]]> 

</flag>

<flag name="f-prof_use" class="optimization" regexp="-prof_use\b">
   <![CDATA[
      <p>Instructs the compiler to produce a profile-optimized 
      executable and merges available dynamic information (.dyn) 
      files into a pgopti.dpi file. If you perform multiple 
      executions of the instrumented program, -Qprof_use merges 
      the dynamic information files again and overwrites the 
      previous pgopti.dpi file.</p>

      <p>Without any other options, the current directory is 
      searched for .dyn files</p>
   ]]> 
</flag>

<flag name="f-i-static" class="other" regexp="\-i\-static\b">
   <![CDATA[
     <p>Generates static binaries. Libraries are statically linked in to the executable. Default behavior on
     Mac OS X is to produce dynamically linked binaries. This flag has been deprecated in the 10.x compiler; 
     use -static-intel instead. </p>
   ]]> 
</flag>

<flag name="f-static-intel" class="optimization" regexp="\-static\-intel\b">
   <![CDATA[
     <p>This option causes the Intel-provided libraries to be linked in statically. It is the opposite of -shared-intel. Note that
      when this option is provided, libguide is also linked in statically.</p>
   ]]> 
</flag>

<flag name="f-unrolln" class="optimization" regexp="-unroll\d+\b">
   <![CDATA[
     <p>Tells the compiler the maximum number of times (n) to unroll loops. </p>
   ]]> 
</flag>

<flag name="f-no-builtin" class="optimization" regexp="-Oi-\b">
   <![CDATA[
     <p>Disables inline expansion of all intrinsic functions. 
   ]]> 
</flag>

<flag name="f-mno-ieee-fp" class="optimization" regexp="-Oi-\b">
   <![CDATA[
      <p>Disables conformance to the ANSI C and IEEE 754 standards for 
      floating-point arithmetic.</p>
   ]]> 
</flag>

<flag name="f-fomit-frame-pointer" class="optimization" regexp="-Oy\b">
   <![CDATA[
     <p>Allows use of EBP as a general-purpose register in optimizations. </p>
   ]]> 
</flag>

<flag name="f-ffunction-sections" class="optimization" regexp="-Os\b">
   <![CDATA[
      <p>Places each function in its own COMDAT section.</p>
   ]]> 
</flag>

<flag name="linker_flags" regexp="F-Wl" class="other">
   <![CDATA[
      <p>Pass options o1, o2, etc. to the linker for processing. </p>
   ]]>    
</flag>

<flag name="stack_addr_ld" regexp="-Wl,-stack_addr[,](0x)?([A-Za-z0-9]+)\b" class="other">
   <![CDATA[
  <p>
  Specifies the initial address of the stack pointer value,  where
  value  is a hexadecimal number rounded to the segment alignment.
  The default segment alignment is the target pagesize (currently,
  1000  hexadecimal for the PowerPC and for i386).  If -stack_size
  is specified and -stack_addr is not,  a  default  stack  address
  specific  for the architecture being linked will be used and its
  value printed as a warning  message.   This  creates  a  segment
  named  __UNIXSTACK.  Note that the initial stack address will be
  either at the high address of the segment or the low address  of
  the segment depending on which direction the stack grows for the
  architecture being linked.
  </p>
   ]]>    
</flag>

<flag name="stack_size_ld" regexp="-Wl,-stack_size[,](0x)?([A-Za-z0-9]+)\b" class="other">
   <![CDATA[
  <p>
  Specifies the size of the stack segment value, where value is  a
  hexadecimal  number  rounded  to  the  segment  alignment.   The
  default segment alignment is  the  target  pagesize  (currently,
  1000  hexadecimal for the PowerPC and for i386).  If -stack_addr
  is specified and -stack_size is not, a default stack  size  specific  
  for  the  architecture  being linked will be used and its
  value printed as a warning  message.   This  creates  a  segment
  named __UNIXSTACK .
  </p>
 ]]>    
</flag>

</flagsdescription>

