<?xml version="1.0"?>
<!DOCTYPE flagsdescription
    SYSTEM "http://www.spec.org/dtd/cpuflags1.dtd"
>

<!-- The lines above are NOT optional.  If you're adept at reading DTDs,
     the one that this file conforms to is at the URL listed above.  
     
     But most humans writing a flags file will want to have it automatically 
     checked using a validating parser such as RXP (available at
     http://www.ltg.ed.ac.uk/~richard/rxp.html), or use one of the on-line
     parsers:
       http://www.stg.brown.edu/service/xmlvalid/
       http://www.cogsci.ed.ac.uk/~richard/xml-check.html
     
     The parser used by the CPU tools is _not_ a validating parser, so it
     may be possible to sneak things by it that would not pass the checkers
     above.  However, if the checkers above say that your file is clean, it's
     clean.

     Flag files submitted to SPEC _will_ be checked by a validating parser.
     Invalid or not-well-formed flag files will be rejected.

     This file is
       Copyright (C) 2006 Standard Performance Evaluation Corporation
       All Rights Reserved
     
     This file may be freely modified and redistributed, provided that the
     copyright notice above and this notice remain unaltered.

     $Id: flags-simple.xml 4007 2006-03-17 11:34:42Z cloyce $

     **********************************************************************
     **********************************************************************
     Unless otherwise explicitly noted, all references to "section n.nn"
     refer to flag_description.html, available at

     http://www.spec.org/cpu2006/docs/flag_description.html
     **********************************************************************
     **********************************************************************

	NOTES:

		Earlier versions of this document contain items that are not currently
		in use with the CPU2006 or MPI2007 benchmark suites. Most of these
		items have been omitted here, but may need to be added later.

		Likely this document will be separated into platform vs. environment
        vs.compiler vs. flag information. 

		This file is based on CPU2006_flags.20070614.xml.
		
-->
<flagsdescription>

<filename>IBM-XL</filename>

<title>IBM XL Compiler Flags and Common Unix Commands and Environment Settings</title>

<style>
<![CDATA[
body { background: white; }
]]>
</style>

<!-- =====================================================================
  The <header> section is also entirely optional.  If it is provided, and
  no class is specified, then it will be inserted verbatim at the top
  of the flags dump.

  If a class is specified, that text will be inserted verbatim before flags
  of that class.  
  
  As the contents should be HTML, it will save lots of time to just enclose
  the whole thing in a CDATA section.  Section 2.3.1 again.
     ===================================================================-->
<header>
<![CDATA[
<p>Compilers: IBM XL C/C++ Enterprise Edition Version 8.0</p>
<p>Compilers: IBM XL Fortran Enterprise Edition Version 10.1</p>
<p>Compilers: IBM XL C/C++ Enterprise Edition Version 9.0</p>
<p>Compilers: IBM XL Fortran Enterprise Edition Version 11.1</p>
<p>Compilers: IBM XL C/C++ Enterprise Edition Version 10.1</p>
<p>Compilers: IBM XL Fortran Enterprise Edition Version 12.1</p>
<p>Last updated: 04-Aug-2008</p> ]]>
</header>

<!-- =====================================================================
  Information about the meaning of boot-time settings, BIOS options,
  kernel tuning, and so forth can go in the 'platform_settings' section.

  They'll be appended to the end of both the flags dump and per-result flag report.

  As the contents should be HTML, it will save lots of time to just enclose
  the whole thing in a CDATA section.  Section 2.3.1 again.
     ===================================================================-->
<platform_settings>
<![CDATA[
<ul>
<li> fdpr -q -O4 -A 32 -bldcg -shci 90 -sdp 9</li>
<pre>
       The fdpr command (Feedback Directed Program Restructuring) is a performance-tuning utility that may help
       improve the execution time and the real memory utilization of user-level application programs. The fdpr program
       optimizes the executable image of a program by collecting information on the behavior of the program while the
       program is used for some typical workload, and then creating a new version of the program that is optimized for
       that workload. The new program generated by fdpr typically runs faster and uses less real memory.

Usage:
fdpr [options] -p program [-x invocation]
where -p specifies the input program, in a form of executable, shared object
or archive file
-x specifies how to invoke the program
[options] can be one or more of the following:

  Action Options:

  -123  Specifies which actions/phases to run, where:
        -1  generates instrumented program for profile gathering
        -2  runs the instrumented program and updates profile data (requires -x &lt;invocation&gt;)
        -3  generates optimized program
        Default is set to run all three phases (-123)

  -a/--action [action]  Specifies customized actions
  where [action] can be one of the following:
        anl          analyze program
        instr        generate instrumented program for profile gathering (same as -1)
        opt          generate optimized program (same as -3)
        check_sign   check fdpr signature in the input program


  Analysis Options:

  -esa, --extra-safe-analysis
                         Do not attempt to analyze unconventional CSects
                         containing hand-written assembly code (when used, must
                         be specified at both instrumentation and optimization
                         phases)
  -aawc/-noaawc, --analyze-assembly-written-csects/--noanalyze-assembly-written-csects
                         Analyze/Do not analyze objects written in assembly (when
                         used/not used, must be specified at both
                         instrumentation and optimization phases). The default
                         is set to analyze assembly written modules
  -iinf, --ignore-info   Ignore .info sections produced with the -qfdpr option
                         during compile time
  -fca, --funcsect-analysis
                         Apply special analysis for an input executable that was
                         compiled with the -qfuncsect compiler option
  -ff &lt;string&gt;, --file-format &lt;string&gt;
                         Input file format: can be LM (load module) or PO
                         (program object)


  Instrumentation Options:

  -ei, --embedded-instrumentation
                         Perform embedded instrumentation. Profile will be
                         collected into global variables
  -infp, --ignore-not-found-procedures
                         Ignore not found procedures
  -fd &lt;Fdesc&gt;, --file-descriptor &lt;Fdesc&gt;
                         Set file descriptor number to be used when opening the
                         profile file that is mapped to the shared memory area
                         during profiling. The default of &lt;Fdesc&gt; is set to the
                         maximum-allowed open files
  -M &lt;addr&gt;, --profile-map &lt;addr&gt;
                         Set shared memory segment address for profiling.
                         Alternate shared memory addresses are needed when the
                         instrumented program application creates a conflict
                         with the shared-memory addresses preserved for the
                         profiling. Typical alternative values are 0x40000000,
                         0x50000000, ... up to 0xC0000000. Default is set to
                         0x3000000
  -ri/-nori, --register-instrumentation/--noregister-instrumentation
                         Instrument/Do not instrument the input program file to
                         collect profile information about indirect branches via
                         registers (applicable only with the -a instr option).
                         The default is set to collect the profile information
  -sfp/-nosfp, --save-floating-point-registers/--nosave-floating-point-registers
                         Save/Do not save floating point registers in
                         instrumented code (the default is set to save floating
                         point registers)
  -ipnvr, --instrumentation-preserve-non-volatile-registers
                         Preserve non volatile registers while calling stubs
  -iplr/-noiplr, --instrumentation-preserve-link-register/--noinstrumentation-preserve-link-register
                         Preserve/Do not preserve link register while calling
                         stubs
  -ipcr/-noipcr, --instrumentation-preserve-condition-register/--noinstrumentation-preserve-condition-register
                         Preserve/Do not preserve Condition Register while
                         calling stubs
  -ipctr/-noipctr, --instrumentation-preserve-count-register/--noinstrumentation-preserve-count-register
                         Preserve/Do not preserve Count Register while calling
                         stubs
  -ipxer/-noipxer, --instrumentation-preserve-fixed-point-exception-register/--noinstrumentation-preserve-fixed-point-exception-register
                         Preserve/Do not preserve Fixed-Point Exception Register
                         while calling stubs
  -ipspr/-noipspr, --instrumentation-preserve-special-registers/--noinstrumentation-preserve-special-registers
                         Preserve/Do not preserve special purpose registers while
                         calling stubs
  -ipvr/-noipvr, --instrumentation-preserve-volatile-registers/--noinstrumentation-preserve-volatile-registers
                         Preserve/Do not preserve volatile registers while
                         calling stubs. -noipvr implies -noipnvr and -nosfp
  -ipe/-noipe, --instrumentation-preserve-environment/--noinstrumentation-preserve-environment
                         Do not preserve registers that are not overwritten while
                         calling stubs. -noipe implies -noipvr -noipspr
  -spescr &lt;0-127&gt;, --spe-scratch-register &lt;0-127&gt;
                         Specify a global SPE scratch register, decreasing
                         instrumenation overhead, in order to minimize
                         possibility of local store overflow


  Profile Files Options:

  -af &lt;prof_file&gt;, --ascii-profile-file &lt;prof_file&gt;
                         Set the name of an ASCII profile file containing profile
                         information given by three different XML entry options:
                         &lt;Simple .. &gt;, &lt;Cond .. &gt; and &lt;Reg .. &gt; for profiling
                         data on regular, conditional or branch via registers
                         instructions accordingly
  -aop, --accept-old-profile
                         Accept old profile file collected on previous versions
                         of the input program file (requires the -f flag)
  -f &lt;prof_file&gt;, --profile-file &lt;prof_file&gt;
                         Set the profile file name. The profile file is created
                         during the instrumentation phase when issued with -a
                         instr option. The file is read by fdpr when issued with
                         the -a opt or -a anl options. Note, the profile file is
                         updated automatically when running the instrumented
                         program
  -spefdir &lt;directory&gt;, --spe-profile-directory &lt;directory&gt;
                         Set the directory where SPE profiles are located in
                         integrated mode (see -cell). Default is where &lt;program&gt;
                         is located


  Optimization Options:

  -A &lt;num_of_bytes&gt;, --align-code &lt;num_of_bytes&gt;
                         Align program code according to given &lt;num_of_bytes&gt;
  -bldcg, --build-dcg    Build a DCG (data connectivity graph) for enhanced data
                         reordering (applicable only with the -RD flag)
  -btcar, --branch-table-csect-anchor-removal
                         Eliminate load instructions related to the usage of
                         branch tables in the code
  -cRD, --conservativeRD
                         Perform conservative static data reordering by packing
                         all frequently referenced static variables together
  -cbtd, --convert-bss-to-data
                         Convert bss section into a data section (useful for more
                         aggressive tocload and RD optimizations)
  -dce, --dead-code-elimination
                         Eliminate instructions related to unused local variables
                         within frequently executed functions (useful mainly
                         after applying function inlining optimization)
  -dp, --data-prefetch   Insert dcbt instructions to improve data-cache
                         performance
  -dpht &lt;threshold&gt;, --data-placement-hotness-threshold &lt;threshold&gt;
                         Set data placement algorithm hotness threshold between
                         (0,1), where 0 will reorder the static variables in
                         large groups based on the control flow, and 1 reorders
                         the variables in very small groups based on their
                         access frequency. (applicable only with the -RD flag)
  -dpnf &lt;factor&gt;, --data-placement-normalization-factor &lt;factor&gt;
                         Set data placement algorithm normalization factor
                         between (0,1), where 0 causes static variables to be
                         reordered regardless of their size, and 1 locates only
                         small sized variables first. (applicable only with the
                         -RD flag)
  -ece, --epilog-code-eliminate
                         Reduce code size by grouping common instructions in
                         functions' epilogs, into a single unified code
  -fc, --function-cloning
                         Enable only function cloning phase during function
                         inlining optimizations (applicable only with function
                         inlining flags: -i, -si, -ihf, -isf, -shci)
  -hr, --hco-reschedule  Relocate instructions from frequently executed code to
                         rarely executed code areas, when possible
  -hrf &lt;factor&gt;, --hco-resched-factor &lt;factor&gt;
                         Set the aggressiveness of the -hr optimization option
                         according to a factor value between (0,1), where 0 is
                         the least aggressive factor (applicable only with the
                         -hr option)
  -i, --inline           Same as --selective-inline with --inline-small-funcs 12
  -ihf &lt;pct&gt;, --inline-hot-functions &lt;pct&gt;
                         Inline all function call sites to functions that have a
                         frequency count greater than the given &lt;pct&gt; frequency
                         percentage
  -isf &lt;size&gt;, --inline-small-funcs &lt;size&gt;
                         Inline all functions that are smaller or equal to the
                         given &lt;size&gt; in bytes
  -kr, --killed-registers
                         Eliminate stores and restores of registers that are
                         killed (overwritten) after frequently executed function
                         calls
  -lap, --load-address-propagation
                         Eliminate load instructions of variables' addresses by
                         re-using pre-loaded addresses of adjacent variables
  -las, --load-after-store
                         Add NOP instructions to place each load instruction
                         further apart following a store instruction that
                         reference the same memory address
  -lro, --link-register-optimization
                         Eliminate saves and restores of the link register in
                         frequently-executed functions
  -lu &lt;aggressiveness_factor&gt;, --loop-unroll &lt;aggressiveness_factor&gt;
                         Unroll short loops containing of one to several basic
                         blocks according to an aggressiveness factor between
                         (1,9), where 1 is the least aggressive unrolling option
                         for very hot and short loops
  -lun &lt;unrolling_number&gt;, --loop-unrolling-number &lt;unrolling_number&gt;
                         Set the number of unrolled iterations in each unrolled
                         loop. The allowed range is between (2,50). Default is
                         set to 2. (applicable only with the -lu flag)
  -O                     Switch on basic optimizations only. Same as -RC -nop -bp
                         -bf
  -O2                    Switch on less aggressive optimization flags. Same as -O
                         -hr -pto -isf 8 -tlo -kr
  -O3                    Switch on aggressive optimization flags. Same as -O2 -RD
                         -isf 12 -si -dp -lro -las -vro -btcar -lu 9 -rt 0
  -O4                    Switch on aggressive optimization flags together with
                         aggressive function inlining. Same as -O3 -sidf 50 -ihf
                         20
  -pc, --preserve-csects
                         Preserve CSects' boundaries in reordered code
  -pca, --propagate-constant-area
                         Relocate the constant variables area to the top of the
                         code section when possible
  -pfb, --preserve-first-bb
                         Preserve original location of the entry point basic
                         block in program
  -pp, --preserve-functions
                         Preserve functions' boundaries in reordered code
  -pr/-nopr, --ptrgl-r11/--noptrgl-r11
                         Perform/Do not perform removal of R11 load instruction
                         in _ptrgl csect (the default is to perform the
                         optimization)
  -pto, --ptrgl-optimization
                         Perform optimization of indirect call instructions via
                         registers by replacing them with conditional direct
                         jumps
  -ptosl &lt;limit_size&gt;, --ptrgl-optimization-size-limit &lt;limit_size&gt;
                         Set the limit of the number of conditional statements
                         generated by -pto optimization. Allowed values are
                         between 1..100. Default value set to 3. (applicable
                         only with the -pto flag)
  -ptoht &lt;heatness_threshold&gt;, --ptrgl-optimization-heatness-threshold &lt;heatness_threshold&gt;
                         Set the frequency threshold for indirect calls that are
                         to be optimized by -pto optimization. Allowed range
                         between 0..1. Default is set to 0.8. (applicable only
                         with -pto flag)
  -rcaf &lt;aggressiveness_factor&gt;, --reorder-code-aggressivenes-factor &lt;aggressiveness_factor&gt;
                         Set the aggressiveness of code reordering optimization.
                         Allowed values are 1 and 2, where 1 is less aggressive.
                         Default is set to 1. (applicable only with the -RC
                         flag)
  -rcctf &lt;termination_factor&gt;, --reorder-code-chain-termination-factor &lt;termination_factor&gt;
                         Set the threshold fraction which determines when to
                         terminate each chain of basic blocks during code
                         reordering. Allowed input range is between 0.0 to 1.0
                         where 0.0 generates long chains and 1.0 creates single
                         basic block chains. Default is set to 0.05. (applicable
                         only with the -RC flag)
  -rccrf &lt;reversal_factor&gt;, --reorder-code-condition-reversal-factor &lt;reversal_factor&gt;
                         Set the threshold fraction which determines when to
                         enable condition reversal for each conditional branch
                         during code reordering. Allowed input range is between
                         0.0 to 1.0 when 0.0 tries to preserve original
                         condition direction and 1.0 ignores it. Default is set
                         to 0.8 (applicable only with the -RC flag)
  -RD, --reorder-data    Perform static data reordering
  -rmte, --remove-multiple-toc-entries
                         Remove multiple TOC entries pointing to the same
                         location in the input program file
  -rt &lt;removal_factor&gt;, --reduce-toc &lt;removal_factor&gt;
                         Perform removal of TOC entries according to a removal
                         factor between (0,1), where 0 removes non-accessed TOC
                         entries only, and 1 removes all possible TOC entries
  -sdp &lt;aggressiveness_factor&gt;, --stride-data-prefetch &lt;aggressiveness_factor&gt;
                         Perform data prefetching within frequently executed
                         loops based on stride analysis, according to an
                         aggressiveness factor between (1,9), where 1 is least
                         aggressive
  -sdpla &lt;iterations_number&gt;, --stride-data-prefetch-look-ahead &lt;iterations_number&gt;
                         Set the number of iterations for which data is
                         prefetched into the cache ahead of time. Default value
                         is set to 4 iterations. (applicable only with the -sdp
                         flag)
  -sdpms &lt;stride_min_size&gt;, --stride-data-prefetch-min-size &lt;stride_min_size&gt;
                         Set the minimal stride size in bytes, for which data
                         will be considered as a candidate for prefetching.
                         Default value is set to 128 bytes. (applicable only
                         with the -sdp flag)
  -shci &lt;pct&gt;, --selective-hot-code-inline &lt;pct&gt;
                         Perform selective inlining of functions in order to
                         decrease the total execution counts
  -si, --selective-inline
                         Perform selective inlining of dominant hot function
                         calls
  -sll &lt;Lib1:Prof1,...,LibN:ProfN&gt;, --static-link-libraries &lt;Lib1:Prof1,...,LibN:ProfN&gt;
                         Statically link hot code from specified dynamically
                         linked libraries to the input program. The parameter
                         consists of comma-separated list of libraries and their
                         profiles. IMPORTANT: licensing rights of specified
                         libraries should be observed when applying this copying
                         optimization
  -sllht &lt;hotness_threshold&gt;, --static-link-libraries-hotness-threshold &lt;hotness_threshold&gt;
                         Set hotness threshold for the --static-link-libraries
                         optimization. The allowed input range is between 0
                         (least aggressive) to 1, or -1, which does not require
                         profile and selects all code that might be called by
                         the input program from the given libraries. Default is
                         0.5
  -sidf &lt;percentage_factor&gt;, --selective-inline-dominant-factor &lt;percentage_factor&gt;
                         Set a dominant factor percentage for selective inline
                         optimization. The allowed range is between (0,100).
                         Default is set to 80 (applicable only with the -si and
                         -pbsi flags)
  -siht &lt;frequency_factor&gt;, --selective-inline-hotness-threshold &lt;frequency_factor&gt;
                         Set a hotness threshold factor percentage for selective
                         inline optimization to inline all dominant function
                         calls that have a frequency count greater than the
                         given frequency percentage. Default is set to 100
                         (applicable only with the -si -pbsi flags)
  -so, --stack-optimization
                         Reduce the stack frame size of functions which are
                         called with a small number of arguments
  -stf, --stack-flattening
                         Merge the stack frames of inlined functions with the
                         frames of the calling functions
  -tb, --preserve-traceback-tables
                         Force the restructuring of traceback tables in reordered
                         code. If -tb option is omitted, traceback tables are
                         automatically included only for C++ applications which
                         use the Try & Catch mechanism
  -rtb, --remove-traceback-tables
                         Remove traceback tables in reordered code
  -tlo, --tocload-optimization
                         Replace each load instruction that references the TOC
                         with a corresponding add-immediate instruction via the
                         TOC anchor register, when possible
  -ucde, --unreachable-code-data-elimination
                         Remove unreachable code and non-accessed static data
  -vro, --volatile-registers-optimization
                         Eliminate stores and restores of non-volatile registers
                         in frequently executed functions by using available
                         volatile registers


  Output Options:
  -d, --disassemble-text
                         Print the disassembled text section of the output
                         program into &lt;output_file&gt;.dis_text file
  -dap, --dump-ascii-profile
                         Dump profile information in ASCII format into
                         &lt;program&gt;.aprof (requires the -f flag)
  -db, --disassemble-bss
                         Print the disassembled bss section of the output program
                         into &lt;output_file&gt;.dis_bss file
  -dd, --disassemble-data
                         Print the disassembled data section of the output
                         program into &lt;output_file&gt;.dis_data file
  -diap, --dump-initial-ascii-profile
                         Dump initial profile information in ASCII format into
                         &lt;program&gt;.aprof.init (requires the -f flag)
  -dim, --dump-instruction-mix
                         Dump instruction mix statistics based on gathered
                         profile information
  -dm, --dump-mapper     Print a map of basic blocks and static variables with
                         their respective new -&gt; old addresses into a
                         &lt;program&gt;.mapper file
  -o &lt;output_file&gt;, --output-file &lt;output_file&gt;
                         Set the name of the output file. The default
                         instrumented file is &lt;program&gt;.instr. The default
                         optimized file is &lt;program&gt;.fdpr
  -pif, --print-inlined-funcs
                         Print the list of inlined functions along with their
                         corresponding calling functions, in ASCII format into a
                         &lt;program&gt;.inlined file (requires the -si or -i or -isf
                         flags)
  -ppcf, --print-prof-counts-file
                         Print the profiling counters in ASCII format into a
                         &lt;program&gt;.counts file (requires the -f flag)
  -simo, --single-input-multiple-outputs
                         Optimize in parallel into multiple outputs as specified
                         by option sets read from stdin
  -sf, --strip-file      Strip the optimized output file
  -spe, --speculative-profile-enhancement
                         Complements given partial profile information of basic
                         blocks' frequencies, i.e., transforms basic block
                         profile to a complete edge profile
  -spedir &lt;directory&gt;, --spe-directory &lt;directory&gt;
                         Set the directory into which SPE executables will be
                         extracted and from which they will be encapsulated
  -enc, --encapsulate    Encapsulate SPE executables present in the PPE input
                         (see --spe-directory)


  General Options:
  -gro, --generate-relinkable-output
                         Generate relinkable output
  -h, --help             Print online usage help
  -m &lt;machine-model&gt;, --machine &lt;machine-model&gt;
                         Generate code for the specified machine model. Target
                         machine can be one of the following models: power2,
                         power3, ppc405, ppc440, power4, ppc970, power5, power6,
                         spe, spe_edp. Default is set to no machine
  -q, --quiet            Set quiet output mode, suppressing informational
                         messages
  -st &lt;stat_file&gt;, --statistics &lt;stat_file&gt;
                         Output statistics information to &lt;stat_file&gt;. If
                         &lt;stat_file&gt; is '-', output goes to standard output. See
                         --verbose for the default
  -V, --version          Print version
  -v &lt;level&gt;, --verbose &lt;level&gt;
                         Set verbose output mode level. When set, various
                         statistics about the target optimized program are
                         printed into file &lt;program&gt;.stat. Allowed level range
                         is between (0,3). Default is set to 0
  -cell, --cell-supervisor
                         Integrated PPE/SPE processing. Perform SPE extraction,
                         processing, and encapsulation automatically prior to
                         PPE processing
  -armember              For archive files - list of archive members to be
                         optimized, if -armember is not specified, all members
                         will be optimized
</pre>
</ul>
]]>
</platform_settings>



<!--
  -
  -	Compiler declarations.
  -
  -
  -->

<flag
      name="xlc"
      class="compiler"
      regexp="(\S*\/)?xlc(_r)?\b">
<example>
xlc,
xlc_r
</example>
<![CDATA[
<p>
Invoke the IBM XL C compiler. 32-bit binaries are produced by default.
</p>
]]>
</flag>


<flag
      name="xlC"
      class="compiler"
      regexp="(\S*\/)?xlC(_r)?\b">
<example>
xlC,
xlC_r
</example>
<![CDATA[
<p>
Invoke the IBM XL C++ compiler. 32-bit binaries are produced by default.
</p>
]]>
</flag>


<flag
      name="xlf95"
      class="compiler"
      regexp="(\S*\/)?xlf95(_r)?\b">
<example>
xlf95,
xlf95_r
</example>
<![CDATA[
<p>
Invoke the IBM XL Fortran compiler. 32-bit binaries are produced by default.
</p>
]]>
</flag>


<!--
  -
  -	Aggregated optimization flags.
  -
  -->

<flag name="F-O5"
      class="optimization"
      regexp="-O5\b">
<example>
-O5
</example>
<![CDATA[
<p>
Perform optimizations for maximum performance. This includes maximum
interprocedural analysis on all of the objects presented on the "link" 
step. This level of optimization will increase the compiler's memory
usage and compile time requirements. -O5 Provides all of the functionality
of the -O4 option, but also provides the functionality of the
-qipa=level=2 option.
</p>

<p>
-O5 is equivalent to the following flags
<ul>
  <li> <tt>-O4</tt> </li>
  <li> <tt>-qipa=level=2</tt> </li>
</ul>
</p>
]]>
<include flag="F-O4" />
<include flag="F-qipa:level" flagtext="-qipa=level=2" />
</flag>


<flag name="F-O4"
      class="optimization"
      regexp="-O4\b">
<example>
-O4
</example>
<![CDATA[
<p>
Perform optimizations for maximum performance. This includes
interprocedural analysis on all of the objects presented on the "link" 
step.
</p>

<p>
-O4 is equivalent to the following flags
<ul>
  <li> <tt>-O3</tt> </li>
  <li> <tt>-qipa=level=1</tt> </li>
  <li> <tt>-qarch=auto</tt> </li>
  <li> <tt>-qtune=auto</tt> </li>
</ul>
</p>
]]>
<include flag="F-O3" />
<include flag="F-qipa:level" flagtext="-qipa=level=1" />
<include flag="F-qarch" flagtext="-qarch=auto" />
<include flag="F-qtune" flagtext="-qtune=auto" />
</flag>


<flag name="F-O3"
      class="optimization"
      regexp="-O3\b">
<example>-O3</example>
<![CDATA[
<p>
-O3 Performs additional optimizations that are memory intensive, compile-time
intensive, and may change the semantics of the program slightly, unless
-qstrict is specified. We recommend these optimizations when the desire for
run-time speed improvements outweighs the concern for limiting compile-time
resources. The optimizations provided include:
<ul>
  <li> In-depth memory access analysis </li>
  <li> Better loop scheduling </li>
  <li> High-order loop analysis and transformations (-qhot=level=0) </li>
  <li> Inlining of small procedures within a compilation unit by default </li>
  <li> Eliminating implicit compile-time memory usage limits </li>
  <li> Widening, which merges adjacent load/stores and other operations </li>
  <li> Pointer aliasing improvements to enhance other optimizations </li>
</ul>
</p>

<p>
-O3 is equivalent to the following flags
<ul>
  <li> <tt>-O2</tt> </li>
  <li> <tt>-qhot=level=0</tt> </li>
</ul>
</p>
]]>
<include flag="F-O2" />
<include flag="F-qhot" flagtext="-qhot=level=0" />
</flag>


<flag name="F-O2"
      class="optimization"
      regexp="-O2\b">
<example>-O2</example>
<![CDATA[
<p>
-O2 Performs a set of optimizations that are intended to offer improved
performance without an unreasonable increase in time or storage that is
required for compilation including:
<ul>
  <li> Eliminates redundant code </li>
  <li> Basic loop optimization </li>
  <li> Can structure code to take advantage of -qarch and -qtune settings </li>
</ul>
</p>
]]>
<include flag="F-O" />
</flag>


<flag name="F-O"
      class="optimization"
      regexp="-O\b">
<example>-O</example>
<![CDATA[
<p>
-O enables the level of optimization that represents the best tradeoff
between compilation speed and run-time performance.
If you need a specific level of optimization, specify the appropriate
numeric value. 
Currently, -O is equivalent to -O2.
</p>
]]>
<include flag="F-O2" />
</flag>



<!--
  -
  -	Optimization flags: individual methods.
  -
  -->

<flag name="F-qarch"
      class="optimization"
      regexp="-qarch=(\S+)\b">
<example>-qarch</example>
<![CDATA[
<p>
Produces object code containing instructions that will run on the
specified processors. "auto" selects the processor the compile
is being done on. "pwr5x" is the POWER5+ processor.
</p>

<p>Supported values for this flag are</p>
<ul>
  <li>auto	</li> Use the processor on which the program is compiled.
  <li>pwr6e	</li> The POWER6 processor in "Enhanced" mode based systems.
  <li>pwr6	</li> The POWER6 processor based systems.
  <li>pwr5x	</li> The POWER5+ processor based systems.
  <li>pwr5	</li> The POWER5 processor based systems.
  <li>pwr4	</li> The POWER4 processor based systems.
  <li>ppc970	</li> The PPC970 processor based systems.
</ul>
]]>
</flag>


<flag name="F-qtune"
      class="optimization"
      regexp="-qtune=(\S+)\b">
<example>-qtune</example>
<![CDATA[
<p>
Specifies the system architecture for which the executable program
is optimized.  This includes instruction scheduling and cache setting.

The supported values for <tt>suboption</tt> are:

<ul>
  <li>auto	</li> Use the processor on which the program is compiled.
  <li>pwr6e	</li> The POWER6 processor in "Enhanced" mode based systems.
  <li>pwr6	</li> The POWER6 processor based systems.
  <li>pwr5x	</li> The POWER5+ processor based systems.
  <li>pwr5	</li> The POWER5 processor based systems.
  <li>pwr4	</li> The POWER4 processor based systems.
  <li>ppc970	</li> The PPC970 processor based systems.
</ul>
</p>
]]>
</flag>


<flag name="F-qinlglue"
      class="optimization"
      regexp="-qinlglue\b">
This option inlines glue code that optimizes external
function calls when compiling.
</flag>


<flag name="F-qhot"
      class="optimization"
      regexp="-qhot(=arraypad|=simd|=vector|=level=[01])?\b">
<example>
-qhot,
-qhot=level=1,
-qhot=simd
</example>
<![CDATA[
<p>
Performs high-order transformations on loops during optimization.
The supported values for <tt>suboption</tt> are:
<ul>
   <li>arraypad </li> The compiler will pad any arrays where it infers that there may be a benefit.
   <li>level=0  </li> The compiler performs a limited set of high-order loop transformations.
   <li>level=1  </li> The compiler performs its full set of high-order loop transformations.
   <li>simd     </li> Replaces certain instruction sequences with vector instructions.
   <li>vector   </li> Replaces certain instruction sequences with calls to the MASS library.
</ul>
</p>
<p>
Specifying -qhot without suboptions implies -qhot=nosimd, -qhot=noarraypad, -qhot=vector and
-qhot=level=1. The -qhot option is also implied by -O4, and -O5.
</p>
]]>
</flag>


<flag name="F-qipa:level"
      class="optimization"
      regexp="-qipa=level=[012]\b">
<example>
-qipa=level
</example>
<![CDATA[
<p>
Enhances optimization by doing detailed analysis across procedures
(interprocedural analysis or IPA). 
The <tt>level</tt> determines the amount of interprocedural analysis
and optimization that is performed.
</p>

<p>
  <tt>level=0</tt> Does only minimal interprocedural analysis and optimization
</p>

<p>
  <tt>level=1</tt> turns on inlining , limited alias analysis, and limited
  call-site tailoring
</p>

<p>
  <tt>level=2</tt> turns on full interprocedural data flow and alias analysis
</p>
]]>
</flag>


<flag name="F-qnoipa"
      class="optimization"
      regexp="-qnoipa\b">
<![CDATA[
<p>
Suppresses interprocedural analysis (IPA), which is enabled by default
at optimization levels -O4 and -O5.
</p>
]]>
</flag>


<flag name="F-qpdf1"
      class="optimization"
      regexp="-qpdf1\b">
The option used in the first pass of a profile directed feedback compile
that causes pdf information to be generated.
The profile directed feedback optimization gathers data on both execution
path and data values. It does not use hardware counters, nor gather any
data other than path and data values for PDF specific optimizations.
</flag>


<flag name="F-qpdf2"
      class="optimization"
      regexp="-qpdf2\b">
The option used in the second pass of a profile directed feedback compile
that causes PDF information to be utilized during optimization.
</flag>


<flag name="F-qfdpr"
      class="optimization"
      regexp="-qfdpr\b">
The compiler generates additional symbol information for use by the AIX "fdpr"
binary optimization tool.
</flag>


<flag name="F-qxlf90"
      class="optimization"
      regexp="-qxlf90=(signedzero|nosignedzero|autodealloc|noautodealloc|oldpad|nooldpad|)\b">
<example>
-qxlf90=nosignedzero
</example>
<![CDATA[
<pre>
         -qxlf90=<suboption>
                Determines whether the compiler provides the
                Fortran 90 or the Fortran 95 level of support for
                certain aspects of the language. <suboption> can be
                one of the following:

                signedzero | nosignedzero
                     Determines how the SIGN(A,B) function handles
                     signed real 0.0. In addition, determines
                     whether negative internal values will be
                     prefixed with a minus when formatted output
                     would produce a negative sign zero.
                autodealloc | noautodealloc
                     Determines whether the compiler deallocates
                     allocatable arrays that are declared locally
                     without either the SAVE or the STATIC
                     attribute and have a status of currently
                     allocated when the subprogram terminates.
                oldpad | nooldpad
                     When the PAD=specifier is present in the
                     INQUIRE statement, specifying -qxlf90=nooldpad
                     returns UNDEFINED when there is no connection,
                     or when the connection is for unformatted I/O.
                     This behavior conforms with the Fortran 95
                     standard and above. Specifying -qxlf90=oldpad
                     preserves the Fortran 90 behavior.

                Default:
                     o signedzero, autodealloc and nooldpad for the
                     xlf95, xlf95_r, xlf95_r7 and f95 invocation
                     commands.
                     o nosignedzero, noautodealloc and oldpad for
                     all other invocation commands.
</pre>
]]>
</flag>



<!--
  -
  -	Optimization flags: memory allocation.
  -
  -->

<flag name="F-q64"
      class="optimization"
      regexp="-q64\b">
<example>
-q64
</example>
Generates 64 bit ABI binaries. The default is to generate 32 bit ABI binaries.
</flag>


<flag name="F-qlargepage"
      class="optimization"
      regexp="-qlargepage\b">
Indicates that a program, designed to execute in a 
large page memory environment, can take advantage 
of large 16 MB pages provided on POWER4 and higher 
based systems.
</flag>


<flag name="F-qalloca"
      class="optimization"
      regexp="-qalloca\b">
Indicates that the compiler understands how to do alloca().
</flag>


<flag name="F-qsmallstack:dynlenonheap"
      class="optimization"
      regexp="-qsmallstack=dynlenonheap\b">
Causes the Fortran compiler to allocate dynamic arrays on the heap instead
of the stack
</flag>


<flag name="F-qsave"
      class="optimization"
      regexp="-qsave\b">
Specifies that all local variables be treated as STATIC.
</flag>



<!--
  -
  -	Optimization flags: vector calculations.
  -
  -->

<flag name="F-qenablevmx"
      class="optimization"
      regexp="-q(no)?enablevmx\b">
<example>
-qenablevmx
-qnoenablevmx
</example>
Enables the generation of vector instructions for processors
that support them.
</flag>


<flag name="F-qvecnvol"
      class="optimization"
      regexp="-qvecnvol\b">
Specifies whether to use volatile or non-volatile vector
registers. Volatile vector registers are registers whose
value is not preserved across function calls so the 
compiler will not depend on values in them across function
calls.
</flag>



<!--
  -
  -	Optimization flags: support libraries.
  -
  -->

<flag name="F-lmass"
      class="optimization"
      regexp="-lmass\b">
Link the mathematical acceleration subsystem libraries (MASS),
which contain libraries of tuned mathematical intrinsic
functions.
</flag>


<flag name="F-lessl"
      class="optimization"
      regexp="-lessl\b">
Link the Engineering and Scientific Subroutine Library (ESSL).
</flag>


<flag name="F-qessl"
      class="optimization"
      regexp="-qessl\b">
Specifies that, if either -lessl or -lesslsmp are also
specified, then Engineering and Scientific Subroutine Library
(ESSL) routines should be used in place of some Fortran 90
intrinsic procedures when there is a safe opportunity to do so.
</flag>


<!--
  -
  -	Mixed: Semantic compliance issues.
  -
  -->

<flag name="F-qrtti:all"
      class="optimization"
      regexp="-qrtti=all\b">
Cause the C++ compiler to generate Run Time Type Identification code
</flag>


<flag name="F-qchars:signed"
      class="portability"
      regexp="-qchars=signed\b">
Causes the compiler to treat "char" variables as signed instead of the
default of unsigned.
</flag>


<!--
  -
  -	Portability flags: syntactic compliance.
  -
  -->

<flag name="F-qfixed"
      class="portability"
      regexp="-qfixed\b">
Indicates that the input fortran source program is in fixed form.
</flag>


<flag name="F-qextname"
      class="portability"
      regexp="-qextname\b">
Adds an underscore to global entities to match the C compiler ABI
</flag>


<flag name="F-qcpluscmt"
      class="portability"
      regexp="-qcpluscmt\b">
<example>
-qcpluscmt
</example>
<![CDATA[
<p>
 Permits the usage of "//" to introduce a comment
 that lasts until the end of the current source
 line, as in C++.
</p>
]]>
</flag>


<!--
  -
  -	Other flags: optimizations and non-compliant code.
  -
  -->

<flag name="F-qalias"
      class="optimization"
      regexp="-qalias=(noansi|nostd)\b">
<example>
-qalias=noansi,
-qalias=nostd
</example>
<![CDATA[
<pre>
 qalias=ansi | noansi
   If ansi is specified, type-based aliasing is
   used during optimization, which restricts the
   lvalues that can be safely used to access a
   data object. The default is ansi for the xlc,
   xlC, and c89 commands. This option has no
   effect unless you also specify the -O option.

 qalias=std |nostd
   Indicates whether the compilation units contain
   any non-standard aliasing (see Compiler Reference
   for more information). If so, specify nostd. 
</pre>
]]>
</flag>


<flag name="F-qalign"
      class="optimization"
      regexp="-qalign=(\S+)\b">
<example>
-qalign=natural
</example>
<![CDATA[
<pre>
           Specifies what aggregate alignment rules the
                compiler uses for file compilation, where the
                alignment options are:

                bit_packed
                     The compiler uses the bit_packed alignment
                     rules.
                full
                     The compiler uses the RISC System/6000
                     alignment rules. This is the same as power.
                mac68k
                     The compiler uses the Macintosh alignment
                     rules.  This suboption is valid only for 32-
                     bit compilations.
                natural
                     The compiler maps structure members to their
                     natural boundaries.
                packed
                     The compiler uses the packed alignment rules.
                power
                     The compiler uses the RISC System/6000
                     alignment rules.
                twobyte
                     The compiler uses the Macintosh alignment
                     rules.  This suboption is valid only for 32-
                     bit compilations.  The mac68k option is the
                     same as twobyte.

                The default is -qalign=full.
</pre>
]]>
</flag>


<flag name="F-qsmp:auto"
      class="optimization"
      parallel="yes"
      regexp="-qsmp=auto\b">
Causes the compiler to automatically generate parallel code using
OMP controls when possible.
</flag>


<flag name="F-qsmp:omp"
      class="optimization"
      parallel="yes"
      regexp="-qsmp=omp\b">
Tell the compiler that OMP controls are used to identify parallel code.
</flag>


<flag name="F-qstrict"
      class="optimization"
      regexp="-q(no)?strict\b">
<example>
-qstrict,
-qnostrict
</example>
<![CDATA[
<pre>
                Turns off aggressive optimizations which have the
                potential to alter the semantics of your program.
                -qstrict sets -qfloat=nofltint:norsqrt. -qnostrict
                sets -qfloat=rsqrt. This option is only valid with
                -O2 or higher optimization levels.

                Default:
                     o -qnostrict at -O3 or higher.
                     o -qstrict otherwise.

</pre>
]]>
</flag>


<flag name="F-qlanglvl:extc99"
      class="compiler"
      regexp="-qlanglvl=extc99\b">
Allows most any c dialect.
</flag>


<!--
  -
  -	Other flags: compiler resource consumption.
  -
  -->

<flag name="F-qipa:noobject"
      class="other"
      regexp="-qipa=noobject\b">
<example>
-qipa=noobject
</example>
<![CDATA[
<p>
 Specifies whether to include standard object code in the object files.
 The <tt>noobject</tt> suboption can substantially reduce overall
 compilation time, by not generating object code during the first IPA phase.
 This option does not affect the code in the final binary created.
</p>
]]>
</flag>


<flag name="F-qipa:threads"
      class="other"
      regexp="-qipa=threads(=\d+)?\b">
<example>
-qipa=threads
</example>
<![CDATA[
<p>
 The <tt>threads</tt> suboption allows the IPA optimizer to run portions
 of the optimization process in parallel threads, which can speed up the
 compilation process on multi-processor systems. All the available
 threads, or the number specified by N, may be used. N must be a positive
 integer. Specifying <tt>nothreads</tt> does not run any parallel threads;
 this is equivalent to running one serial thread.
 This option does not affect the code in the final binary created.
</p>
]]>
</flag>


<flag name="F-qspillsize"
      class="other"
      regexp="-qspillsize=\d+\b">
<example>
-qspillsize=512,
-qspillsize=32648
</example>
<![CDATA[
<p>
Specifies the size of the compiler's internal program storage areas, in bytes.
</p>
]]>
</flag>



<!--
  -
  -	Other flags: error & warning messages.
  -
  -->

<flag name="F-qdebug:except"
      class="other"
      regexp="-qdebug=except\b">
Causes the compiler to output a traceback if it abends.
</flag>


<flag name="F-qsuppress"
      class="other"
      regexp="-qsuppress=([^:\s]+):(\S+)">
<example>-qsuppress=msg1:msg2</example>
<include text="-qsuppress=$2" />
<include text="-qsuppress=$1" />
<display enable="0" />
</flag>


<flag name="F-qsuppress:"
      class="other"
      regexp="-qsuppress=([^:\s]+)\b">
<example>
-qsuppress=1500-036,
-qsuppress=cmpmsg
</example>
Suppresses the message with the message number specified.
</flag>


<flag name="F-w"
      class="other"
      regexp="-w\b">
Suppresses informational, language-level, and warning messages. This option sets
-qflag=e:e.
</flag>


<!--
  -
  - Other flags: instrumentation & debugging.
  -
  -->

</flagsdescription>
