<?xml version="1.0"?>
<!DOCTYPE flagsdescription
   SYSTEM "http://www.spec.org/dtd/cpuflags1.dtd"
>

<flagsdescription>
<!--
   <filename>HP-200607413-PathScale24_PGI61-flags</filename>
-->
   <title>SPEC CPU2006 Flag Descriptions for QLogic PathScale Compiler Suite and PGI Server Complete</title>

   <style>
      <![CDATA[
         body { background: white; }
      ]]>
   </style>
	
<!-- HEADERs -->

   <header>
      <![CDATA[
         <div style="font-weight: bold; font-size: larger; margin-left: 2em"> <h3 style="text-indent: -2em">Compilers:</h3>
         <p>QLogic PathScale Compiler Suite; Copyright &copy; 2006 QLogic Corporation. All rights reserved.</p>
         <p>PGI Server Complete; Portions Copyright &copy; 2006 STMicroelectronics, N.V., Portions Copyright &copy; 2006 The Portland Group, Inc.</p> 
         </div>
         
         <hr />

      ]]>
   </header>

   <header class="compiler">
    <![CDATA[
       <p>HEADER for COMPILER</p>
    ]]>
 </header>
	
   <header class="portability">
      <![CDATA[
         <p>HEADER for PORTABILITY</p>
      ]]>
   </header>

   <header class="optimization">
      <![CDATA[
         <p>HEADER for OPTIMIZATION</p>
      ]]>
   </header>
	
    <header class="other">
      <![CDATA[
         <p>HEADER for OTHER</p>
      ]]>
   </header>
	
<!-- /HEADERs -->

<!-- OPTIMIZATION -->

  	<!-- PathScale Optimization flags -->

	   <flag name="F-O_n" class="optimization" regexp="-O[0-3]\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-O3</example>
	      <![CDATA[
	         <p>Specify the basic level of optimization desired.<br />
	         The options can be one of the following:</p>
	
	            <p style="text-indent: -25px; margin-left: 25px">
	               0&nbsp;&nbsp;&nbsp; Turn off all optimizations.</p>
	
	            <p style="text-indent: -25px; margin-left: 25px">
	               1&nbsp;&nbsp;&nbsp; Turn on local optimizations that
	               can be done quickly.</p>
	
	            <p style="text-indent: -25px; margin-left: 25px">
	               2&nbsp;&nbsp;&nbsp; Turn on extensive optimization.
	               This is the default.<br />
	               The optimizations at this level are generally conservative,
	               in the sense that they are virtually always beneficial,
	               provide improvements commensurate to the compile time
	               spent to achieve them, and avoid changes which affect
	               such things as floating point accuracy.</p>
	
	            <p style="text-indent: -25px; margin-left: 25px">
	               3&nbsp;&nbsp;&nbsp; Turn on aggressive optimization.<br />
	               The optimizations at this level are distinguished from -O2
	               by their aggressiveness, generally seeking highest-quality
	               generated code even if it requires extensive compile time.
	               They may include optimizations that are generally beneficial
	               but may hurt performance.<br />
	               This includes but is not limited to turning on the
	               Loop Nest Optimizer, -LNO:opt=1, and setting
	               -OPT:ro=1:IEEE_arith=2:Olimit=9000:reorg_common=ON.</p>
	
	            <p style="text-indent: -25px; margin-left: 25px">
	               s&nbsp;&nbsp;&nbsp; Specify that code size is to be given
	               priority in tradeoffs with execution time.</p>
	
	         <p>If no value is specified, 2 is assumed.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-Ofast" class="optimization" regexp="-Ofast\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-Ofast</example>
	      <![CDATA[
	         <p>Equivalent to -O3 -ipa -OPT:Ofast -fno-math-errno -ffast-math.<br />
	         Use optimizations selected to maximize performance.
	         Although the optimizations are generally safe, they may affect
	         floating point accuracy due to rearrangement of computations.</p>
	         <p>NOTE: -Ofast enables -ipa (inter-procedural analysis),
	         which places limitations on how libraries and .o files are built.</p>
	      ]]>
	      <include flag="F-O_n" />
	      <include flag="F-ipa" />
	      <include flag="F-OPT:Ofast" />
	      <include flag="F-fno-math-errno" />
	      <include flag="F-ffast-math" />
	      <display enable="1" />
	   </flag>
	
	   <flag name="F-fb_create" class="optimization"
	         regexp="-fb_create fbdata" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-fb_create fbdata</example>
	      <![CDATA[
	         <p>-fb_create &lt;path&gt;<br />
	         Used to specify that an instrumented executable program is to be
	         generated. Such an executable is suitable for producing feedback
	         data files with the specified prefix for use in feedback-directed
	         optimization (FDO).
	         The commonly used prefix is "fbdata".<br />
	         This is OFF by default.</p>
	         <p>During the training run, the instrumented executable produces information regarding execution paths and data values, but
	         does not generate information by using hardware performance counters. </p>
	      ]]>
 	   </flag>
	
	   <flag name="F-fb_opt" class="optimization"
	         regexp="-fb_opt fbdata" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-fb_opt fbdata</example>
	      <![CDATA[
	         <p>-fb_opt &lt;prefix for feedback data files&gt;<br />
	         Used to specify feedback-directed optimization (FDO) by extracting
	         feedback data from files with the specified prefix, which were
	         previously generated using -fb-create.
	         The commonly used prefix is "fbdata".
	         The same optimization flags should be used
	         for both the -fb-create and fb_opt compile steps.
	         Feedback data files created from executables compiled
	         with different optimization flags may give checksum errors.<br />
	         FDO is OFF by default.</p>
	         <p>During the -fb_opt compilation phase, information regarding execution paths and data values are
	         used to improve the information available to the optimizer.  FDO enables some optimizations which
	         are only performed when the feedback data file is available.  The safety of optimizations performed under FDO is
	         consistent with the level of safety implied by the other optimization flags (outside of fb_create and
	         fb_opt) specified on the compile and link lines.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-m32" class="optimization" regexp="-m32\b"
	         compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-m32</example>
	      <![CDATA[
	         <p>Compile for 32-bit ABI, also known as x86 or IA32.</p>
	      ]]>
	   </flag>
	   
	   <flag name="F-march" class="optimization"
            regexp="-march=(opteron|athlon64|athlon64fx|em64t|pentium4|xeon|anyx86|auto)">
          <![CDATA[
            <p>Compiler will optimize code for selected platform.  auto means to optimize
                  for the platform on which the compiler is running, as
                  determined by reading /proc/cpuinfo.  anyx86 means a generic 32-bit x86
                  processor without SSE2 support.</p>
          ]]>
       </flag>
	
	   <flag name="F-fexceptions" class="optimization"
	         regexp="-f(no-|)exceptions" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-fno-exceptions</example>
	      <![CDATA[
	         <p>(For C++ only) -fexceptions enables exception handling.
	         This is the default.
	         -fno-exceptions disables exception handling.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-ffast-math" class="optimization"
	         regexp="-f(no-|)fast-math" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-ffast-math</example>
	      <![CDATA[
	         <p>-ffast-math improves FP speed by relaxing ANSI &amp; IEEE rules.
	         -fno-fast-math tells the compiler to conform to ANSI and IEEE
	         math rules at the expense of speed. -ffast- math implies 
	         -OPT:IEEE_arithmetic=2 -fno-math-errno.   -fno-fast-math
	         implies -OPT:IEEE_arithmetic=1 -fmath-errno.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-fno-math-errno" class="optimization" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-fno-math-errno</example>
	      <![CDATA[
	         <p>Do not set ERRNO after calling math functions that are executed
	         with a single instruction, e.g. sqrt. A program that relies on IEEE
	         exceptions for math error handling may want to use this flag for speed
	         while maintaining IEEE arithmetic compatibility. This is implied by
	         -Ofast. The default is -fmath-errno.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-ipa" class="optimization" 
	         regexp="-ipa" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-ipa</example>
	      <![CDATA[
	         <p>Invoke inter-procedural analysis (IPA). Specifying this option is
	         identical to specifying -IPA or -IPA:.
	         Default settings for the individual IPA suboptions are used.</p>
	      ]]>
	   </flag>
	
	   <!-- Splitter for the flag groups      -->
	   <!-- -CG:, -IPA:, -LNO:, -OPT:, -WOPT: -->
	
	   <flag name="F-splitting:all" class="optimization"
	         regexp="-(CG|IPA|LNO|OPT|WOPT):([^:\s]+):(.+)\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <include text="-$1:$2" />
	      <include text="-$1:$3" />
	      <display enable="0" />
	   </flag>
	
	   <!-- Sub-flags of the -CG: group -->
	
	   <flag name="F-CG:cflow" class="optimization"
	         regexp="-CG:cflow=(on|off|0|1)" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-CG:cflow</example>
	      <![CDATA[
	         <p>The Code Generation option group -CG: controls the optimizations
	         and transformations of the instruction-level code generator.</p>
	
	         <p>-CG:flow : OFF disables control flow optimization in the code
	         generation. Default is ON.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-CG:gcm" class="optimization"
	         regexp="-CG:gcm=(on|off|0|1)" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-CG:gcm</example>
	      <![CDATA[
	         <p>-CG:gcm : Specifying OFF disables the instruction-level
	         global code motion optimization phase. The default is ON.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-CG:load_exe" class="optimization"
	         regexp="-CG:load_exe=\d+" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-CG:load_exe</example>
	      <![CDATA[
	         <p>-CG:load_exe=N : Specify the threshold for subsuming a memory load
	         operation into the operand of an arithmetic instruction.
	         The value of 0 turns off this subsumption optimization.
	         If N is 1, this subsumption is performed only when the result of
	         the load has only one use.
	         This subsumption is not performed if the number of times the result
	         of the load is used exceeds the value N, a non-negative integer.<br />
	         If the ABI is 64-bit and the language is Fortran, the default for N
	         is 2, otherwise the default is 1.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-CG:local_fwd_sched" class="optimization"
	         regexp="-CG:local_fwd_sched=(on|off|0|1)" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-CG:local_fwd_sched</example>
	      <![CDATA[
	         <p>-CG:local_fwd_sched : Change the instruction scheduling algorithm
	         to work forward instead of backward for the instructions
	         in each basic block.
	         The default is OFF for 64-bit ABI, and ON for 32-bit ABI.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-CG:movnti" class="optimization"
	         regexp="-CG:movnti=\d+" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-CG:movnti</example>
	      <![CDATA[
	         <p>-CG:movnti=N : Convert ordinary stores to non-temporal stores 
	         when writing memory blocks  of  size  larger than  N  KB.  When  N 
	         is set to 0, this transformation is avoided. 
	         The default value is 120 (KB).</p>
	      ]]>
	   </flag>
	
	   <flag name="F-CG:prefetch" class="optimization"
             regexp="-CG:prefetch=(on|off|0|1)">
	      <example>-CG:prefetch</example>
	      <![CDATA[
	         <p>-CG:prefetch : Suppress  any generation of prefetch instructions in the code 
	         generator.  The default is ON.</p>
	      ]]>
	   </flag>

	
	   <!-- Sub-flags of the -IPA: group -->
	
	   <flag name="F-IPA:callee_limit" class="optimization"
	         regexp="-IPA:callee_limit=\d+" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-IPA:callee_limit</example>
	      <![CDATA[
	         <p>The inter-procedural analyzer option group -IPA: controls
	         application of inter-procedural analysis and optimization.</p>
	
	         <p>-IPA:callee_limit=N : Functions whose size exceeds this limit
	         will never be automatically inlined by the compiler.
	         The default is 500.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-IPA:linear" class="optimization"
	         regexp="-IPA:linear=(on|off|0|1)" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-IPA:linear</example>
	      <![CDATA[
	         <p>-IPA:linear : Controls conversion of a multi-dimensional array
	         to a single dimensional (linear) array that covers the same block
	         of memory. When inlining Fortran subroutines, IPA tries to map
	         formal array parameters to the shape of the actual parameter.
	         In the case that it cannot map the parameter, it linearizes
	         the array reference. By default, IPA will not inline such callsites
	         because they may cause performance problems.
	         The default is OFF.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-IPA:plimit" class="optimization"
	         regexp="-IPA:plimit=\d+" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-IPA:plimit</example>
	      <![CDATA[
	         <p>-IPA:plimit=N : This option stops inlining into a specific
	         subprogram once it reaches size N in the intermediate representation.
	         Default is 2500.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-IPA:pu_reorder" class="optimization"
	         regexp="-IPA:pu_reorder=[0-2]" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-IPA:pu_reorder</example>
	      <![CDATA[
	         <p>-IPA:pu_reorder=N : Control re-ordering the layout of program units
	         based on their invocation patterns in feedback compilation to minimize
	         instruction cache misses.
	         This option is ignored unless under feedback compilation.</p>
	
	         <p style="text-indent: -25px; margin-left: 25px">
	            0&nbsp;&nbsp;&nbsp; Disable procedure reordering.
	            This is the default for non-C++ programs.</p>
	
	         <p style="text-indent: -25px; margin-left: 25px">
	            1&nbsp;&nbsp;&nbsp; Reorder based on the frequency
	            in which different procedures are invoked.
	            This is the default for C++ programs.</p>
	
	         <p style="text-indent: -25px; margin-left: 25px">
	            2&nbsp;&nbsp;&nbsp; Reorder based on caller-callee
	            relationship.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-IPA:space" class="optimization"
	         regexp="-IPA:space=\d+" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-IPA:space</example>
	      <![CDATA[
	         <p>-IPA:space=N : Inline until a program expansion of N % is reached.
	         For example, -IPA:space=20 limits code expansion due to inlining
	         to approximately 20 %. Default is no limit.</p>
	      ]]>
	   </flag>
	
	   <!-- Sub-flags of the -LNO group -->
	
	   <flag name="F-LNO:blocking" class="optimization"
	         regexp="-LNO:blocking=(on|off|0|1)\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-LNO:blocking</example>
	      <![CDATA[
	         <p>Specify options and transformations performed on loop nests
	         by the Loop Nest Optimizer (LNO). The -LNO options are enabled only
	         if -O3 is also specified on the pathf95 command line.</p>
	
	         <p>-LNO:blocking : Enable or disable the cache blocking transformation.
	         The default is ON.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-LNO:full_unroll" class="optimization"
	         regexp="-LNO:(full_unroll|fu)=\d+\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-LNO:full_unroll</example>
	      <![CDATA[
	         <p>-LNO:full_unroll,fu=N : Fully unroll loops with trip_count &lt;= N
	         inside LNO. N can be any integer between 0 and 100.
	         The default value for N is 5. Setting this flag to 0 disables
	         full unrolling of small trip count loops inside LNO.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-LNO:full_unroll_outer" class="optimization"
	         regexp="-LNO:full_unroll_outer=(on|off|0|1)\b">
	      <example>-LNO:full_unroll_outer</example>
	      <![CDATA[
	         <p>-LNO:full_unroll_outer=(on|off|0|1) : Control  the  full unrolling of loops with 
	         known trip count that do not contain a loop
	        and are not contained in a loop. The conditions implied by both the  full_unroll  and
	        the  full_unroll_size options must be satisfied for the loop to be fully unrolled. The
	        default is OFF.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-LNO:full_unroll_size" class="optimization"
	         regexp="-LNO:full_unroll_size=\d+\b">
	      <example>-LNO:full_unroll_size</example>
	      <![CDATA[
	         <p>-LNO:full_unroll_size=N : Fully  unroll  loops  with  unrolled  loop  
	         size &lt;= N inside LNO. N can be any integer
	        between 0 and 10000. The conditions implied by the full_unroll  option  must  also  be
	        satisfied for the loop to be fully unrolled. The default value for N is 2000.</p>
	      ]]>
	   </flag>

	   <flag name="F-LNO:fusion" class="optimization"
	         regexp="-LNO:fusion=[0-2]\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-LNO:fusion</example>
	      <![CDATA[
	         <p>-LNO:fusion=N : Perform loop fusion. N can be one of the following:<br />
	         0 = Loop fusion is off<br />
	         1 = Perform conservative loop fusion<br />
	         2 = Perform aggressive loop fusion<br />
	         The default is 1.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-LNO:ignore_feedback" class="optimization"
	         regexp="-LNO:ignore_feedback=(on|off|0|1)\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-LNO:ignore_feedback</example>
	      <![CDATA[
        	 <p>-LNO:ignore_feedback=(on|off|0|1) : If the flag is ON then feedback information
	         from the loop annotations will be ignored in LNO transformations.
	         The default is OFF.</p>
	      ]]>
	   </flag>

	   <flag name="F-LNO:interchange" class="optimization"
	         regexp="-LNO:interchange=(on|off|0|1)\b">
	      <example>-LNO:interchange</example>
	      <![CDATA[
	         <p>-LNO:interchange=(on|off|0|1) : Disable the loop interchange transformation in the 
	         loop nest optimizer. Default is ON.
	         </p>
	      ]]>
	   </flag>
	
	   <flag name="F-LNO:minvariant" class="optimization"
	         regexp="-LNO:minvariant=(on|off|0|1)\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-LNO:minvariant</example>
	      <![CDATA[
	         <p>Enable or disable moving loop-invariant expressions out 
	         of loops. The default is ON.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-LNO:ou_prod_max" class="optimization"
	         regexp="-LNO:ou_prod_max=\d+\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-LNO:ou_prod_max</example>
	      <![CDATA[
	         <p>-LNO:ou_prod_max=N : This option indicates that the product
	         of unrolling of the various outer loops in a given loop nest
	         is not to exceed N, where N is a positive integer.
	         The default is 16.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-LNO:prefetch_ahead" class="optimization"
	         regexp="-LNO:prefetch_ahead=\d+\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-LNO:prefetch_ahead</example>
	      <![CDATA[
	         <p>-LNO:prefetch_ahead=N : Prefetch N cache line(s) ahead.
	         The default is 2.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-LNO:prefetch" class="optimization"
	         regexp="-LNO:prefetch=[0-3]\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-LNO:prefetch</example>
	      <![CDATA[
	         <p>-LNO:prefetch=(0|1|2|3) : This option specifies
	         the level of prefetching.</p>
	
	         <p>0 = Prefetch disabled.</p>
	
	         <p>1 = Prefetch is done only for arrays that are always referenced
	         in each iteration of a loop.</p>
	
	         <p>2 = Prefetch is done without the above restriction.
	         This is the default.</p>
	
	         <p> 3 = Most aggressive.</p>
	          ]]>
	   </flag>
	
	<flag name="F-LNO:sclrze" class="optimization"
	      regexp="-LNO:sclrze=(on|off)\b" compilers="Fpathcc,FpathCC,Fpathf95">
	   <example>-LNO:sclrze</example>
	   <![CDATA[
	      <p>Turn ON or OFF the optimization that replaces an array by a 
	      scalar variable. The default is ON.</p>
	   ]]>
	</flag>
	
	
	
	   <flag name="F-LNO:simd" class="optimization"
	         regexp="-LNO:simd=[0-2]\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-LNO:simd</example>
	      <![CDATA[
	         <p>-LNO:simd=(0|1|2) : This option enables or disables
	         inner loop vectorization.</p>
	
	         <p>0 = Turn off the vectorizer.</p>
	
	         <p>1 = (Default) Vectorize only if the compiler can determine that
	         there is no undesirable performance impact due to sub-optimal
	         alignment. Vectorize only if vectorization does not introduce
	         accuracy problems with floating-point operations.</p>
	
	         <p>2 = Vectorize without any constraints (most aggressive).</p>
	      ]]>
	   </flag>
	
	   <!-- Individual subflags of the -OPT: class -->
	
	   <flag name="F-OPT:alias" class="optimization"
	         regexp="-OPT:alias=(typed|restrict|disjoint)\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-OPT:alias</example>
	      <![CDATA[
	         <p>The -OPT: option group controls miscellaneous optimizations.
	         These options override defaults based on the main
	         optimization level.</p>
	
	         <p>-OPT:alias=&lt;name&gt;<br />
	         Specify the pointer aliasing model
	         to be used. By specifying one or more of the following for &lt;name&gt;,
	         the compiler is able to make assumptions throughout the compilation:</p>
	
	         <p style="text-indent: -25px; margin-left: 25px">
	         typed<br />
	            Assume that the code adheres to the ANSI/ISO C standard
	            which states that two pointers of different types cannot point
	            to the same location in  memory.
	            This is ON by default when -OPT:Ofast is specified.</p>
	
	         <p style="text-indent: -25px; margin-left: 25px">
	         restrict<br />
	            Specify that distinct pointers are assumed to point to distinct,
	            non-overlapping objects. This is OFF by default.</p>
	
	         <p style="text-indent: -25px; margin-left: 25px">
	         disjoint<br />
	            Specify that any two pointer expressions are assumed to point
	            to distinct, non-overlapping objects. This is OFF by default.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-OPT:div_split" class="optimization"
	         regexp="-OPT:div_split=(on|off|0|1)\b" compilers="Fpathcc,FpathCC,Fpathf95">
       	  <example>-OPT:div_split</example>
	      <![CDATA[
	         <p>-OPT:div_split=(ON|OFF)<br />
	         Enable or disable changing x/y into x*(recip(y)). This is OFF by
	         default, but enabled by -OPT:Ofast or -OPT:IEEE_arithmetic=3.
	         This transformation generates fairly accurate code.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-OPT:fast_complex" class="optimization"
	         regexp="-OPT:fast_complex=(on|off|0|1)\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-OPT:fast_complex</example>
	      <![CDATA[
	         <p>-OPT:fast_complex<br />
	         Setting fast_complex=ON enables fast
	         calculations for values declared to be of the type complex.
	         When this is set to ON, complex absolute value (norm) and complex
	         division use fast algorithms that overflow for an operand
	         (the divisor, in the case of division) that has an absolute value
	         that is larger than the square root of the largest representable
	         floating-point number.
	         This would also apply to an underflow for a value that is smaller
	         than the square root of the smallest representable floating point
	         number.<br />
	         OFF is the default.<br />
	         fast_complex=ON is enabled if -OPT:roundoff=3 is in effect.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-OPT:IEEE_arith" class="optimization"
	         regexp="-OPT:(IEEE_arithmetic|IEEE_arith|IEEE_a)=[1-3]\b" compilers="Fpathcc,FpathCC,Fpathf95">
       	  <example>-OPT:IEEE_arithmetic</example>
	      <![CDATA[
	         <p>-OPT:IEEE_arithmetic,IEEE_arith,IEEE_a=(1|2|3)<br />
	         Specify the level of conformance to IEEE 754 floating pointing
	         roundoff/overflow behavior.
	         The options can be one of the following:</p>
	
	         <p>1 Adhere to IEEE accuracy. This is the default when optimization
	         levels -O0, -O1 and -O2 are in effect.</p>
	
	         <p>2 May produce inexact result not conforming to IEEE 754.
	         This is the default when -O3 is in effect.</p>
	
	         <p>3 All mathematically valid transformations are allowed.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-OPT:Ofast" class="optimization" 
	         regexp="-OPT:Ofast\b" compilers="Fpathcc,FpathCC,Fpathf95">
          <example>-OPT:Ofast</example>
	      <![CDATA[
	         <p>-OPT:Ofast<br />
	         Use optimizations selected to maximize performance.
	         Although the optimizations are generally safe, they may affect
	         floating point accuracy due to rearrangement of computations.
	         This effectively turns on the following optimizations:
	         -OPT:ro=2:Olimit=0:div_split=ON:alias=typed.</p>
	      ]]>
	      <include flag="F-OPT:ro"/>
	      <include flag="F-OPT:Olimit"/>
	      <include flag="F-OPT:div_split"/>
	      <include flag="F-OPT:alias"/>
	   </flag>
	
	   <flag name="F-OPT:Olimit" class="optimization"
	         regexp="-OPT:Olimit=(\d+)" compilers="Fpathcc,FpathCC,Fpathf95">
       	  <example>-OPT:Olimit</example>
	      <![CDATA[
	         <p>-OPT:Olimit=N<br />
	         Disable optimization when size of program unit is > N. When N is 0,
	         program unit size is ignored and optimization process will not be
	         disabled due to compile time limit.
	         The default is 0 when -OPT:Ofast is specified,
	         9000 when -O3 is specified; otherwise the default is 6000.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-OPT:ro" class="optimization"
	         regexp="-OPT:(roundoff|ro)=[0-3]" compilers="Fpathcc,FpathCC,Fpathf95">
      	  <example>-OPT:ro</example>
	      <![CDATA[
	         <p>-OPT:roundoff,ro=(0|1|2|3)<br />
	         Specify the level of acceptable departure from source language
	         floating-point, round-off, and overflow semantics.
	         The options can be one of the following:</p>
	
	         <p>0 = Inhibit optimizations that might affect the floating-point
	         behavior. This is the default when optimization levels -O0, -O1,
	         and -O2 are in effect.</p>
	
	         <p>1 = Allow simple transformations that might cause limited
	         round-off or overflow differences. Compounding such transformations
	         could have more extensive effects.
	         This is the default when -O3 is in effect.</p>
	
	         <p>2 = Allow more extensive transformations, such as the
	         reordering of reduction loops.
	         This is the default level when -OPT:Ofast is specified.</p>
	
	         <p>3 = Enable any mathematically valid transformation.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-OPT:rsqrt" class="optimization"
	         regexp="-OPT:rsqrt=[0-2]\b" compilers="Fpathcc,FpathCC,Fpathf95">
       	  <example>-OPT:rsqrt</example>
	      <![CDATA[
	         <p>-OPT:rsqrt=(0|1|2)<br />
	         This option specifies if the RSQRT machine instruction should be used
	         to calculate reciprocal square root. RSQRT is faster but potentially
	         less accurate than the regular square root operation.<br />
	         0 means not to use RSQRT.<br />
	         1 means to use RSQRT followed by instructions to refine the result.<br />
	         2 means to use RSQRT by itself.<br />
	         Default is 1 when -OPT:roundoff=2 or greater, else the default is 0.</p>
	      ]]>
	   </flag>
	   
	   <flag name="F-OPT:unroll_size" class="optimization"
	         regexp="-OPT:unroll_size=(\d+)">
          <example>-OPT:unroll_size</example>
	      <![CDATA[
	         <p>-OPT:unroll_size=N<br />
	         Set the ceiling of maximum number of instructions for  an
	         unrolled  inner loop. If N=0, the ceiling is disregarded.
	         The default is 40.</p>
	      ]]>
	   </flag>

	   <flag name="F-OPT:unroll_times_max" class="optimization"
	         regexp="-OPT:(unroll_times_max|unroll_times)=(\d+)">
          <example>-OPT:unroll_times_max</example>
	      <![CDATA[
	         <p>-OPT:unroll_times_max=N<br />
	         Unroll  inner loops by a maximum of N.  The default is 4.</p>
	      ]]>
	   </flag>
	
	   <!-- Individual subflags of the -WOPT: class -->
	
	   <flag name="F-WOPT:aggstr" class="optimization"
	         regexp="-WOPT:aggstr=\d+" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-WOPT:aggstr</example>
	      <![CDATA[
	         <p>The -WOPT: Specifies options that affect the global optimizer.
	         The options are enabled at -O2 or above.</p>
	
	         <p>-WOPT:aggstr=N<br />
	         This controls the aggressiveness of the strength reduction optimization
	         performed by the scalar optimizer, in which induction expressions
	         within a loop are replaced by temporaries that are incremented
	         together with the loop variable. When strength reduction is overdone,
	         the additional temporaries increase register pressure, resulting in
	         excessive register spills that decrease performance.
	         The value specified must be a positive integer value, which specifies
	         the maximum number of induction expressions that will be strength-reduced
	         across an index variable increment.
	         When set at 0, strength reduction is only performed for non-trivial
	         induction expressions. The default is 11.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-WOPT:mem_opnds" class="optimization"
	         regexp="-WOPT:mem_opnds=(on|off|0|1)\b" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-WOPT:mem_opnds</example>
	      <![CDATA[
	         <p>-WOPT:mem_opnds=(ON|OFF)<br />
	         Makes  the scalar optimizer preserve any memory operands of arithmetic
	         operations so as to help bring about subsumption of memory loads into
	         the operands of arithmetic operations. Load subsumption is the combining
	         of an arithmetic instruction and a memory load into one instruction.
	         Default is OFF.</p>
	      ]]>
	   </flag>
	
	   <flag name="F-WOPT:retype_expr" class="optimization"
	         regexp="-WOPT:retype_expr=(on|off|0|1)\b">
	      <example>-WOPT:retype_expr</example>
	      <![CDATA[
	         <p>-WOPT:retype_expr=(ON|OFF)<br />
	        Enables the optimization in the compiler that converts 64-bit address 
	        computation to use 32-bit arithmetic as much as possible. 
	        Default is OFF.</p>
	      ]]>
	   </flag>
	   
	   <!-- End of description of flag groups -->
	
	   <!-- End of description of optimization flags -->

	<!-- /PathScale Optimization flags -->
 
	<!-- PGI Optimization flags -->

		<flag name="c9x" class="optimization"
		compilers="pgcc" regexp="-c9x\b">
		<![CDATA[
		 <p>Use C99 language features.</p>
		]]>
		<example>-c9x</example>
		</flag>
		
		<!-- Optimization Flags -->
		
		<flag name="fast" 
		  class="optimization" 
		  compilers="pgcc, pgCC, pgf90" 
		  regexp="-fast\b">
		<![CDATA[
		 <p>Chooses generally optimal flags for the target platform.</p>
		 ]]>
		<example>-fast</example>
		<include flag="O2" />
		<include flag="Munroll_c_m" flagtext="-Munroll=c:1" />
		<include flag="Msmart" />
		<include flag="Mlre" />
		<include flag="Mnoframe" />
		</flag>
		
		<flag name="fastsse" 
		 class="optimization" 
		 compilers="pgcc, pgCC, pgf90" 
		 regexp="-fastsse\b">
		<![CDATA[
		 <p>Chooses generally optimal flags for a processor that supports SSE capabillity.
		</p>
		 ]]>
		<example>-fastsse</example>
		<include flag="fast" />
		<include flag="Mvect_sse" />
		<include flag="Mcache_align" />
		<include flag="Mflushz" />
		<include flag="Mscalarsse" />
		</flag>
		
		<flag name="no-exceptions" 
		 class="optimization" 
		 compilers="pgCC" 
		 regexp="--no_exceptions\b">
		<![CDATA[
		 <p>Disable C++ exception handling support.</p>
		 ]]>
		<example>--no_exceptions</example>
		</flag>
		
		<flag name="no-rtti" 
		 class="optimization" 
		 compilers="pgCC" 
		 regexp="--no_rtti\b">
		<![CDATA[
		 <p>Disable C++ run time type information support.</p>
		 ]]>
		<example>--no_rtti</example>
		</flag>
		
		<flag name="Mcache_align" 
		 class="optimization" 
		 compilers="pgcc, pgCC, pgf90" 
		 regexp="-Mcache_align\b">
		<![CDATA[
		 <p>Align "unconstrained" data objects of size greater than or equal to 16
		bytes on cache-line boundaries.  An "unconstrained" object is a variable or
		array that is not a member of an aggregate structure or common block, is not
		allocatable, and is not an automatic array.  On by default on 64-bit Linux systems.</p>
		 ]]> 
		<example>-Mcache_align</example>
		</flag>
		
		<flag name="Mflushz" 
		  class="optimization" 
		  compilers="pgcc, pgCC, pgf90" 
		  regexp="-Mflushz\b">
		<![CDATA[
		 <p>Set SSE to flush-to-zero mode; if a floating-point underflow occurs, the value is set to zero.</p>
		]]>
		<example>-Mflushz</example>
		</flag>
		
		<flag name="Mframe" class="optimization" 
		 compilers="pgcc, pgCC, pgf90" 
		 regexp="-Mframe\b">
		<![CDATA[
		 <p>Generate code to set up a stack frame.</p>
		]]>
		<example>-Mframe</example>
		</flag>
		
		<flag name="Mnoframe" class="optimization" 
		 compilers="pgcc, pgCC, pgf90" 
		 regexp="-Mnoframe\b">
		<![CDATA[
		 <p>Eliminates operations that set up a true stack frame pointer for every function.  With this option enabled, you
		cannot perform a traceback on the generated code and you cannot access local variables.</p>
		]]>
		<example>-Mnoframe</example>
		</flag>
		
		
		<flag name="Mfprelaxed" class="optimization" 
		      compilers="pgcc, pgCC, pgf90" 
		      regexp="-Mfprelaxed\b">
		<![CDATA[
		 <p>Instructs the compiler to use relaxed precision in the calculation of some intrinsic functions.  Can result in 
		improved performance at the expense of numerical accuracy.</p>
		 ]]>
		<example>-Mfprelaxed</example>
		</flag>
		
		<flag name="Mprefetch_subopt" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mprefetch=([^,\s]+),(\S+)\b" >
		<include text="-Mprefetch=$1" />
		<include text="-Mprefetch=$2" />
		<display enable="0" />
		</flag>
		
		<flag name="Mprefetch_d_m" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mprefetch=d(istance)*:(\d+)\b">
		<![CDATA[
		 <p>Set the fetch-ahead distance for prefetch instructions to <b>m</b> cache lines</p>
		]]>
		<example>-Mprefetch=d:m</example>
		<include flag="Mprefetch" />
		</flag>
		
		<flag name="Mprefetch_n_p" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mprefetch=n:(\d+)\b">
		<![CDATA[
		 <p>Set maximum number of prefetch instructions to generate for a given loop to <b>p</b>.</p>
		]]>
		<example>-Mprefetch=n:p</example>
		<include flag="Mprefetch" />
		</flag>
		
		<flag name="Mprefetch_nta" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mprefetch=nta\b">
		<![CDATA[
		 <p>Use the <i>prefetchnta</i> instruction.</p>
		]]>
		<example>-Mprefetch_nta</example>
		<include flag="Mprefetch" />
		</flag>
		
		<flag name="Mprefetch_plain" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mprefetch=plain\b">
		<![CDATA[
		 <p>Use the <i>prefetch</i> instruction.</p>
		]]>
		<example>-Mprefetch=plain</example>
		<include flag="Mprefetch" />
		</flag>
		
		<flag name="Mprefetch_t0" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mprefetch=t0\b" >
		<![CDATA[
		 <p>Use the <i>prefetcht0</i> instruction.</p>
		]]>
		<example>-Mprefetch=t0</example>
		<include flag="Mprefetch" />
		</flag>
		
		<flag name="Mprefetch_w" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mprefetch=w\b" >
		<![CDATA[
		 <p>Use the AMD-specific <i>prefetchw</i> instruction.</p>
		]]>
		<example>-Mprefetch=w</example>
		<include flag="Mprefetch" />
		</flag>
		
		<flag name="Mprefetch" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mprefetch\b">
		<![CDATA[
		 <p>Enable generation of prefetch instructions on processors where they are supported.</p>
		]]>
		<example>-Mprefetch</example>
		</flag>
		
		<flag name="Mnoprefetch" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mnoprefetch\b" >
		<![CDATA[
		 <p>Disable generation of prefetch instructions.</p>
		]]>
		<example>-Mnoprefetch</example>
		</flag>
		
		<flag name="Mscalarsse" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mscalarsse\b">
		<![CDATA[
		 <p>Use SSE/SSE2 instructions to perform scalar floating-point arithmetic on targets where these
		 instructions are supported.</p>
		]]>
		<example>-Mscalarsse</example>
		</flag>
		
		<flag name="Mnoscalarsse" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mnoscalarsse\b">
		<![CDATA[
		 <p>Do not use SSE/SSE2 instructions to perform scalar floating-point arithmetic; use x87 operations instead.</p>
		]]>
		<example>-Mnoscalarsse</example>
		</flag>
		
		<flag name="Msignextend" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Msignextend\b">
		<![CDATA[
		 <p>Instructs the compiler to extend the sign bit that is set as a result of an object's conversion from one
		 data type to an object of a larger signed data type.</p>
		]]>
		<example>-Msignextend</example>
		</flag>
		
		<flag name="Mlre_array" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mlre=array\b">
		<![CDATA[
		 <p>Treat individual array element references as candidates for possible loop-carried redundancy elimination.
		 The default is to eliminate only redundant expressions involving two or more operands.</p>
		]]>
		<example>-Mlre_array</example>
		<include flag="Mlre" />
		</flag>
		
		<flag name="Mlre_assoc" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mlre=assoc\b">
		<![CDATA[
		 <p>Allow expression re-association; specifying this sub-option can increase opportunities for loop-carried 
		 redundancy elimination.</p>
		]]>
		<example>-Mlre=assoc</example>
		<include flag="Mlre" />
		</flag>
		
		<flag name="Mlre_noassoc" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mlre=noassoc\b">
		<![CDATA[
		 <p>Disable expression re-association.</p>
		]]>
		<example>-Mlre=noassoc</example>
		<include flag="Mlre" />
		</flag>
		
		<flag name="Mlre" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mlre\b">
		<![CDATA[
		 <p>Enables loop-carried redundancy elimination, an optimization that can reduce the number of arithmetic operations
		 and memory references in loops.</p>
		]]>
		<example>-Mlre</example>
		</flag>
		
		<flag name="Mnolre" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mnolre\b">
		<![CDATA[
		 <p>Disable loop-carried redundancy elimination.</p>
		]]>
		<example>-Mnolre</example>
		</flag>
		
		<flag name="Mnovintr" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mnovintr\b">
		<![CDATA[
		 <p>Instructs the compiler not to perform idiom recognition or introduce calls to hand-optimized vector functions.</p>
		]]>
		<example>-Mnovintr</example>
		</flag>
		
		<flag name="Mpfi" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mpfi\b">
		<![CDATA[
		 <p>Generate profile-feedback instrumentation (PFI); this includes extra code to collect run-time statistics and dump 
		 them to a trace file for use in a subsequent compilation.  PFI gathers information about a program's execution and data values 
		but does not gather information from hardware performance counters.  PFI does gather data for optimizations which are unique to profile-feedback optimization.</p>
		]]>
		<example>-Mpfi</example>
		</flag>
		
		<flag name="Mpfo" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mpfo\b">
		<![CDATA[
		 <p>Enable profile-feedback optimizations.  </p>
		]]>
		<example>-Mpfo</example>
		</flag>
		
		<flag name="Mipa_subopt" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=([^,\s]+),(\S+)\b">
		<include text="-Mipa=$1" />
		<include text="-Mipa=$2" />
		<display enable="0" />
		</flag>
		
		<flag name="Mipa_align" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=align\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Recognize when targets of pointer dummy are aligned.</p>
		]]>
		<example>-Mipa=align</example>
		</flag>
		
		<flag name="Mipa_noalign" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=noalign\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Disable recognizition when targets of pointer dummy are aligned.</p>
		]]>
		<example>-Mipa=noalign</example>
		</flag>
		
		<flag name="Mipa_arg" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=arg\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Remove arguments replaced by -Mipa=ptr,const</p>
		]]>
		<include flag="Mipa_ptr" />
		<include flag="Mipa_const" />
		<example>-Mipa=arg</example>
		</flag>
		
		<flag name="Mipa_noarg" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=noarg\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Do not remove arguments replaced by -Mipa=ptr,const</p>
		]]>
		<include flag="Mipa_ptr" />
		<include flag="Mipa_const" />
		<example>-Mipa=noarg</example>
		</flag>
		
		<flag name="Mipa_cg" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=cg\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Generate call graph information for pgicg tool.</p>
		]]>
		<example>-Mipa=cg</example>
		</flag>
		
		<flag name="Mipa_nocg" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=nocg\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Do not generate call graph information for pgicg
		 tool.</p>
		]]>
		<example>-Mipa=nocg</example>
		</flag>
		
		<flag name="Mipa_const" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=const\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Enable interprocedural constant propagation.</p>
		]]>
		<example>-Mipa=const</example>
		</flag>
		
		<flag name="Mipa_noconst" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=noconst\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Disable interprocedural constant propagation.</p>
		]]>
		<example>-Mipa=noconst</example>
		</flag>
		
		<flag name="Mipa_except" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=except:([\-\w,]+)\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Used with -Mipa=inline to specify functions which should not be inlined.</p>
		]]>
		<include flag="Mipa_inline" />
		<example>-Mipa=except:func</example>
		</flag>
		
		<flag name="Mipa_fast" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=fast\b">
		<![CDATA[
		 <p>Equivalant to -Mipa=align,arg,const,f90ptr,shape,globals,localarg,ptr.</p>
		]]>
		<example>-Mipa=fast</example>
		<include flag="Mipa_align" />
		<include flag="Mipa_arg" />
		<include flag="Mipa_const" />
		<include flag="Mipa_f90ptr" />
		<include flag="Mipa_shape" />
		<include flag="Mipa_globals" />
		<include flag="Mipa_localarg" />
		<include flag="Mipa_ptr" />
		</flag>
		
		<flag name="Mipa_force" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=force\b" >
		<![CDATA[
		 <p>Interprocedural Analysis option: Force all objects to recompile regardless
		 whether IPA information has changed.</p>
		]]>
		<example>-Mipa=force</example>
		</flag>
		
		<flag name="Mipa_globals" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=globals\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Optimize references to global values.</p>
		]]>
		<example>-Mipa=globals</example>
		</flag>
		
		<flag name="Mipa_noglobals" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=noglobals\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Do not optimize references to global values.</p>
		]]>
		<example>-Mipa=noglobals</example>
		</flag>
		
		<flag name="Mipa_inline:n" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=inline:(\d+)\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Automatically determine which functions
		 to inline, limit to <b>n</b> levels.  IPA-based function inlining is performed from leaf
		 routines upward.</p>
		]]>
		<example>-Mipa=inline:n</example>
		</flag>
		
		<flag name="Mipa_inline" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=inline\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Automatically determine which functions to inline.  
		 IPA-based function inlining is performed from leaf routines upward.</p>
		]]>
		<example>-Mipa=inline</example>
		</flag>
		
		
		<flag name="Mipa_libinline" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=libinline\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Allow inlining of routines from libraries.</p>
		]]>
		<include flag="Mipa_inline" />
		<example>-Mipa=libinline</example>
		</flag>
		
		<flag name="Mipa_nolibinline" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=nolibinline\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Do not inline routines from libraries.</p>
		]]>
		<include flag="Mipa_inline" />
		<example>-Mipa=nolibinline</example>
		</flag>
		
		<flag name="Mipa_libopt" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=libopt\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Allow recompiling and optimization of routines from libraries using IPA information.</p>
		]]>
		<example>-Mipa=libopt</example>
		</flag>
		
		<flag name="Mipa_nolibopt" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=nolibopt\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Don't optimize routines in libraries.</p>
		]]>
		<example>-Mipa=nolibopt</example>
		</flag>
		
		<flag name="Mipa_localarg" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=localarg\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: -Mipa=arg plus externalizes local pointer targets.</p>
		]]>
		<include flag="Mipa_arg" />
		<example>-Mipa=localarg</example>
		</flag>
		
		<flag name="Mipa_local" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=local\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: -Mipa=arg plus externalizes local pointer targets.</p>
		]]>
		<include flag="Mipa_arg" />
		<example>-Mipa=local</example>
		</flag>
		
		<flag name="Mipa_nolocalarg" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=nolocal(arg)?\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Do not externalize local pointer targets.</p>
		]]>
		<example>-Mipa=nolocalarg</example>
		</flag>
		
		<flag name="Mipa_ptr" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=ptr\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Enable pointer disambiguation across procedure calls.</p>
		]]>
		<example>-Mipa=ptr</example>
		</flag>
		
		<flag name="Mipa_noptr" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=noptr\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Disable pointer disambiguation.</p>
		]]>
		<example>-Mipa=noptr</example>
		</flag>
		
		<flag name="Mipa_f90ptr" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=f90ptr\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Fortran 90/95 Pointer disambiguation across calls.</p>
		]]>
		<example>-Mipa=f90ptr</example>
		</flag>
		
		<flag name="Mipa_nof90ptr" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=nof90ptr\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Disable Fortran 90/95 pointer disambiguation</p>
		]]>
		<example>-Mipa=nof90ptr</example>
		</flag>
		
		<flag name="Mipa_pure" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=pure\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Pure function detection.</p>
		]]>
		<example>-Mipa=pure</example>
		</flag>
		
		<flag name="Mipa_nopure" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=nopure\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Disable pure function detection.</p>
		]]>
		<example>-Mipa=nopure</example>
		</flag>
		
		<flag name="Mipa_shape" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=shape\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Perform Fortran 90 array shape propagation.</p>
		]]>
		<example>-Mipa=shape</example>
		</flag>
		
		<flag name="Mipa_noshape" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=noshape\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Disable Fortran 90 array shape propagation.</p>
		]]>
		<example>-Mipa=noshape</example>
		</flag>
		
		<flag name="Mipa_vestigial" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=vestigial\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Remove functions that are never called.</p>
		]]>
		<example>-Mipa=vestigial</example>
		</flag>
		
		<flag name="Mipa_novestigial" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa=novestigial\b">
		<![CDATA[
		 <p>Interprocedural Analysis option: Do not remove functions that are never called.</p>
		]]>
		<example>-Mipa=novestigial</example>
		</flag>
		
		<flag name="Mipa" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mipa\b">
		<![CDATA[
		 <p>Enable Interprocedural Analysis.</p>
		]]>
		<include flag="Mipa_const" />
		<example>-Mipa</example>
		</flag>
		
		<flag name="Mconcur_subopt" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mconcur=([^,\s]+),(\S+)\b">
		<include text="-Mconcur=$1" />
		<include text="-Mconcur=$2" />
		<display enable="0" />
		</flag>
		
		<flag name="Mconcur_altcode" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mconcur=altcode\b">
		<![CDATA[
		 <p>Instructs the parallelizer to generate alternate serial code for parallelized loops.  Without arguments, 
		 the parallelizer determines an appropriate cutoff length and generates serial code to be executed whenever
		 the loop count is less than or equal to that length.</p>
		]]>
		<example>-Mconcur=altcode</example>
		<include flag="Mconcur" />
		</flag>
		
		<flag name="Mconcur_altcoden" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mconcur=altcode:(\d+)\b">
		<![CDATA[
		 <p>Instructs the parallelizer to generate alternate serial code for parallelized loops.  With arguments, the serial altcode
		 is executed whenever the loop count is less than or equal to <b>n</b>.</p>
		]]>
		<example>-Mconcur=altcode:n</example>
		<include flag="Mconcur" />
		</flag>
		
		<flag name="Mconcur_noaltcode" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mconcur=noaltcode\b">
		<![CDATA[
		 <p>Always execute the parallelized version of a loop regardless of the loop count.</p>
		]]>
		<example>-Mconcur=noaltcode</example>
		<include flag="Mconcur" />
		</flag>
		
		<flag name="Mconcur_noassoc" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mconcur=noassoc\b" >
		<![CDATA[
		 <p>Disables parallelization of loops with reductions.</p>
		]]>
		<example>-Mconcur=noassoc</example>
		<include flag="Mconcur" />
		</flag>
		
		<flag name="Mconcur_cncall" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mconcur=cncall\b">
		<![CDATA[
		 <p>Assume loops containing calls are safe to parallelize and allows loops containing calls to be
		 candidates for parallelization.  Also, no minimum loop count threshold must be satisfied before 
		 parallelization will occur, and last values of scalars are assumed to be safe.</p>
		]]>
		<example>-Mconcur=cncall</example>
		<include flag="Mconcur" />
		</flag>
		
		<flag name="Mconcur_nocncall" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mconcur=nocncall\b">
		<![CDATA[
		 <p>Do not assume loops containing calls are safe to parallelize.</p>
		]]>
		<example>-Mconcur=nocncall</example>
		<include flag="Mconcur" />
		</flag>
		
		<flag name="Mconcur_dist_block" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mconcur=dist:block\b">
		<![CDATA[
		 <p>Parallelize with block distribution.  Contiguous blocks of iterations of a parallelizable loop
		 are assigned to the available processors.</p>
		]]>
		<example>-Mconcur=dist:bloc</example>
		<include flag="Mconcur" />
		</flag>
		
		<flag name="Mconcur_dist_cyclic" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mconcur=dist:cyclic\b">
		<![CDATA[
		 <p>Parallelize with cyclic distribution.  The outermost parallelizable loop in any loop nest is 
		 parallelized.  If a parallelized loop is innermost, its iterations are allocated to processors cyclically.
		 For example, if there are 3 processors executing a loop, processor 0 performs iterations 0, 3, 6, etc.; processor 1
		 performs iterations 1, 4, 7, etc.; and processor 2 performs iterations 2, 5, 8, etc.</p>
		]]>
		<example>-Mconcur=dist:cyclic</example>
		<include flag="Mconcur" />
		</flag>
		
		<flag name="Mconcur_innermost" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mconcur=innermost\b">
		<![CDATA[
		 <p>Enable parallelization of innermost loops.</p>
		]]>
		<example>-Mconcur=innermost</example>
		<include flag="Mconcur" />
		</flag>
		
		<flag name="Mconcur_noinnermost" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mconcur=noinnermost\b">
		<![CDATA[
		 <p>Disable parallelization of innermost loops.</p>
		]]>
		<example>-Mconcur=noinnermost</example>
		<include flag="Mconcur" />
		</flag>
		
		
		<flag name="Mconcur" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mconcur\b">
		<![CDATA[
		 <p>Instructs the compiler to enable auto-concurrentization of loops.  If <i>-Mconcur</i> is specified, multiple processors
		 will be used to execute loops that the compiler determines to be parallelizable.</p>
		]]>
		<example>-Mconcur</example>
		</flag>
		
		<flag name="Minline_subopt" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Minline=([^,\s]+),(\S+)\b">
		<include text="-Minline=$1" />
		<include text="-Minline=$2" />
		<display enable="0" />
		</flag>
		
		<flag name="Minline_lib" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Minline=lib:([\.\-\w]+)\b">
		<![CDATA[
		 <p>Instructs the inliner to inline the functions within the library <b>filename.ext</b>.</p>
		]]>
		<example>-Minline=lib:filename.ext</example>
		</flag>
		
		<flag name="Minline_except" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Minline=except:([\-\w,]+)\b" >
		<![CDATA[
		 <p>Instructs the inliner to inline all eligible functions except <b>func</b>, a function in the source text.
		 Multiple functions can be listed, comma-separated.</p>
		]]>
		<example>-Minline=except:func</example>
		</flag>
		
		<flag name="Minline_name" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Minline=name:([\-\w,]+)\b" >
		<![CDATA[
		 <p>Instructs the inliner to inline function <b>func</b>.</p>
		]]>
		<example>-Minline=name:func</example>
		</flag>
		
		<flag name="Minline_size" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Minline=size:(\d+)\b">
		<![CDATA[
		 <p>Instructs the inliner to inline functions with <b>n</b> or fewer statements.</p>
		]]>
		<example>-Minline=size:n</example>
		</flag>
		
		<flag name="Minline_levels" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Minline=levels:(\d+)\b">
		<![CDATA[
		 <p>Instructs the inliner to perform <b>n</b> levels of inlining.</p>
		]]>
		<example>-Minline=levels:n</example>
		</flag>
		
		<flag name="Minline" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Minline\b">
		<![CDATA[
		 <p>Instructs the inliner to perform 1 level of inlining.</p>
		]]>
		<example>-Minline</example>
		</flag>
		
		<flag name="Msafeptr_subopt" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Msafeptr=([^,\s]+),(\S+)\b">
		<include text="-Msafeptr=$1" />
		<include text="-Msafeptr=$2" />
		<display enable="0" />
		</flag>
		
		<flag name="Msafeptr_all" class="optimization"
		 compilers="pgcc, pgCC"
		 regexp="-Msafeptr=all\b">
		<![CDATA[
		 <p>Assume all pointers and arrays are independent and safe for aggressive optimizations,
		 and in particular that no pointers or arrays overlap of conflict with each other.</p>
		]]>
		<example>-Msafeptr=all</example>
		<include flag="Msafeptr" />
		</flag>
		
		<flag name="Msafeptr_arg" class="optimization"
		 compilers="pgcc, pgCC"
		 regexp="-Msafeptr=arg\b">
		<![CDATA[
		 <p>Instructs the compiler that arrays and pointers are treated with the same copyin and copyout
		 semantics as Fortran dummy arguments.</p>
		]]>
		<example>-Msafeptr=arg</example>
		<include flag="Msafeptr" />
		</flag>
		
		<flag name="Msafeptr_auto" class="optimization"
		 compilers="pgcc, pgCC"
		 regexp="-Msafeptr=auto\b">
		<![CDATA[
		 <p>Instructs the compiler that local pointers and arrays do not overlap or
		 conflict with each other and are independent.</p>
		]]>
		<example>-Msafeptr=auto</example>
		<include flag="Msafeptr" />
		</flag>
		
		<flag name="Msafeptr_local" class="optimization"
		 compilers="pgcc, pgCC"
		 regexp="-Msafeptr=local\b">
		<![CDATA[
		 <p>Instructs the compiler that local pointers and arrays do not overlap or
		 conflict with each other and are independent.</p>
		]]>
		<example>-Msafeptr=local</example>
		<include flag="Msafeptr" />
		</flag>
		
		<flag name="Msafeptr_static" class="optimization"
		 compilers="pgcc, pgCC"
		 regexp="-Msafeptr=static\b">
		<![CDATA[
		 <p>Instructs the compiler that static pointers and arrays do not overlap or conflict
		 with each other and are independent.</p>
		]]>
		<example>-Msafeptr=static</example>
		<include flag="Msafeptr" />
		</flag>
		
		<flag name="Msafeptr_global" class="optimization"
		 compilers="pgcc, pgCC"
		 regexp="-Msafeptr=global\b">
		<![CDATA[
		 <p>Instructs the compiler that global or external pointers and arrays do not overlap or 
		 conflict with each other and are independent.</p>
		]]>
		<example>-Msafeptr=global</example>
		<include flag="Msafeptr" />
		</flag>
		
		<flag name="Msafeptr" class="optimization"
		 compilers="pgcc, pgCC"
		 regexp="-Msafeptr\b">
		<![CDATA[
		 <p>Instructs the C/C++ compiler to override data dependencies between pointers of a given storage class.</p>
		]]>
		<example>-Msafeptr</example>
		</flag>
		
		<flag name="Munroll_subopt" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Munroll=([^,\s]+),(\S+)\b">
		<include text="-Munroll=$1" />
		<include text="-Munroll=$2" />
		<display enable="0" />
		</flag>
		
		<flag name="Munroll_c_m" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Munroll=c:(\d+)\b">
		<![CDATA[
		 <p>Instructs the compiler to completely unroll loops with a constant loop count of less than
		 or equal to <b>m</b>.</p>
		]]>
		<example>-Munroll=c:m</example>
		</flag>
		
		<flag name="Munroll_n_u" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Munroll=n:(\d+)\b">
		<![CDATA[
		 <p>Instructs the compiler to unroll <b>u</b> times, a loop that is not completely unrolled, or has a 
		 non-constant loop count.</p>
		]]>
		<example>-Munroll=n:u</example>
		</flag>
		
		<flag name="Munroll" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Munroll\b">
		<![CDATA[
		 <p>Invokes the loop unroller.</p>
		]]>
		<example>-Munroll</example>
		</flag>
		
		<flag name="Mnounroll" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mnounroll\b">
		<![CDATA[
		 <p>Disable loop unrolling.</p>
		]]>
		<example>-Mnounroll</example>
		</flag>
		
		<flag name="Msmart" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Msmart\b">
		<![CDATA[
		 <p>Enable an optional post-pass instruction scheduling.</p>
		]]>
		<example>-Msmart</example>
		</flag>
		
		<flag name="Mnosmart" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mnosmart\b">
		<![CDATA[
		 <p>Disable an optional post-pass instruction scheduling.</p>
		]]>
		<example>-Mnosmart</example>
		</flag>
		
		<flag name="Mvect_subopt" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mvect=([^,\s]+),([:\w]+)\b">
		<include text="-Mvect=$1" />
		<include text="-Mvect=$2" />
		<display enable="0" />
		</flag>
		
		
		<flag name="Mnovect" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mnovect\b">
		<![CDATA[
		 <p>Disable automatic vector pipelining.</p>
		]]>
		<example>-Mnovect</example>
		</flag>
		
		<flag name="Mvect_altcode" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mvect=altcode\b">
		<![CDATA[
		 <p>Instructs the vectorizer to generate alternate code for vectorized loops when appropriate.  For each
		 vectorized loop the compiler decides whether to generate altcode and what type or types to generate, which may
		 be any or all of: altcode without iteration peeling, altcode with non-temporal stores and other data cache 
		 optimizations, and altcode base on array alignments calculated dynamically at runtime.  The compiler also 
		 determines suitable loop count and array alignment conditions for executing the altcode.</p>
		]]>
		<example>-Mvect=altcode</example>
		</flag>
		
		<flag name="Mvect_noaltcode" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mvect=noaltcode\b">
		<![CDATA[
		 <p>Disables alternate code generation for vectorized loops.</p>
		]]>
		<example>-Mvect=noaltcode</example>
		</flag>
		
		<flag name="Mvect_assoc" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mvect=altcode\b">
		<![CDATA[
		 <p>Instructs the vectorizer to enable certain associativity conversions that can change the results of a computations
		 due to roundoff error.  A typical optimization is to change an arithmetic operation to an arithmetic opteration that is 
		 mathmatically correct, but can be computationally different, due to round-off error.</p>
		]]>
		<example>-Mvect=assoc</example>
		</flag>
		
		<flag name="Mvect_noassoc" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mvect=noassoc\b">
		<![CDATA[
		 <p>Instructs the vectorizer to disable associativity conversions.</p>
		]]>
		<example>-Mvect=noassoc</example>
		</flag>
		
		<flag name="Mvect_cachesize_n" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mvect=cachesize:(\d+)\b">
		<![CDATA[
		 <p>Instructs the vectorizer, when performing cache tiling optimizations, to assume a cache size of <b>n</b>.
		 The default size is <b>n</b>=262144.</p>
		]]>
		<example>-Mvect=cachesize:n</example>
		</flag>
		
		<flag name="Mvect_fuse" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mvect=fuse\b">
		<![CDATA[
		 <p>Instructs the vectorizer to enable loop fusion.</p>
		]]>
		<example>-Mvect=fuse</example>
		<include flag="Mvect" />
		</flag>
		
		<flag name="Mvect_idiom" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mvect=idiom\b">
		<![CDATA[
		 <p>Instructs the vectorizer to enable idiom recognition.</p>
		]]>
		<example>-Mvect=idiom</example>
		</flag>
		
		<flag name="Mvect_nosizelimit" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mvect=nosizelimit\b">
		<![CDATA[
		 <p>Generate vector loops for all loops where possible regardless of the number of
		 statements in the loop.  This overrides a heuristic in the vectorizer that ordinarily
		 prevents vectorization of loops with a number of statements that exceed a certain threshold.</p>
		]]>
		<example>-Mvect=nosizelimit</example>
		<include flag="Mvect" />
		</flag>
		
		<flag name="Mvect_prefetch" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mvect=prefetch\b">
		<![CDATA[
		 <p>Instructs the vectorizer to generate prefetch instructions.</p>
		]]>
		<example>-Mvect=prefetch</example>
		<include flag="Mvect" />
		</flag>
		
		<flag name="Mvect_sse" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mvect=sse\b">
		<![CDATA[
		 <p>Instructs the vectorizer to search for vectorizable loops and, where possible, make use of
		 SSE, SSE2, and prefetch instructions.</p>
		]]>
		<include flag="Mvect" />
		<example>-Mvect=sse</example>
		</flag>
		
		<flag name="Mvect" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-Mvect\b">
		<![CDATA[
		 <p>Enable automatic vector pipelining.</p>
		]]>
		<include flag="Mvect_assoc" />
		<include flag="Mvect_altcode" />
		<include text="Mvect=cachesize:262144" />
		</flag>
		
		<flag name="O" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-O(\d)\b">
		<include text="-O$1" />
		<display enable="0" />
		</flag>
		
		<flag name="Odefault" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-O\b">
		<![CDATA[
		 <p>Set the optimization level to -O2</p>
		]]>
		<include flag="O2" />
		<example>-O</example>
		</flag>
		
		<flag name="O0" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-O0\b">
		<![CDATA[
		 <p>A basic block is generated for each C statement.  No scheduling is done
		between statements.  No global optimizations are performed.</p>
		]]>
		<example>-O0</example>
		</flag>
		
		<flag name="O1" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-O1\b">
		<![CDATA[
		 <p>Scheduling within extended basic blocks is performed.
		Some register allocation is performed.  No global optimizations
		are performed.</p>
		]]>
		<example>-O1</example>
		</flag>
		
		<flag name="O2" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-O2\b">
		<![CDATA[
		 <p>All level 1 optimizations are performed.  In addition, traditional scalar
		optimizations such as induction recognition and loop invariant motion are
		performed by the global optimizer.</p>
		]]>
		<include flag="O1" />
		<example>-O2</example>
		</flag>
		
		<flag name="O3" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-O3\b">
		<![CDATA[
		 <p>All level 1 and 2 optimizations are performed.
		In addition, this level enables more aggressive code hoisting
		and scalar replacement optimizations 
		that may or may not be profitable.</p>
		]]>
		<include flag="O2" />
		<example>-O3</example>
		</flag>
		
		<flag name="O4orabove" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-O([456789])\b">
		<![CDATA[
		 <p>Same as "-O3".</p>
		]]>
		<include flag="O3" />
		<example>-O4</example>
		</flag>
		
		<flag name="tpk8-32" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-tp\s+k8-32\b">
		<![CDATA[
		 <p>Specify the type of the target processor as AMD64 Processor 32-bit mode.</p>
		]]>
		<example>-tp k8-32</example>
		</flag>
		
		<flag name="tpk8-64" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-tp\s+k8-64\b">
		<![CDATA[
		 <p>Specify the type of the target processor as AMD64 Processor 64-bit mode.</p>
		]]>
		<example>-tp k8-64</example>
		</flag>
		
		
		<flag name="tpp7" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-tp\s+p7\b">
		<![CDATA[
		 <p>Specify the type of the target processor as Intel P7 Architecture (Pentium
		 4, Xeon, Centrino).</p>
		]]>
		<example>-tp p7</example>
		</flag>
		
		<flag name="tpp7-64" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-tp\s+p7-64\b">
		<![CDATA[
		 <p>Specify the type of the target processor as Intel P7 Architecture with
		 EM64t, 64-bit mode.</p>
		]]>
		<example>-tp p7-64</example>
		</flag>
		
		<flag name="tpx64" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-tp\s+x64\b">
		<![CDATA[
		 <p>Use the unified AMD/Intel 64-bit mode.</p>
		]]>
		<example>-tp x64</example>
		</flag>
		
		
		<flag name="tp" class="optimization"
		 compilers="pgcc, pgCC, pgf90"
		 regexp="-tp\s+([\-\w]+)\b">
		<include text="-tp $1" />
		<display enable="0" />
		</flag>

	<!-- /PGI Optimization flags -->

	
<!-- /OPTIMIZATION -->

<!-- PORTABILITY -->

  	<!-- PathScale Portability flags -->

	   <flag name="F-fno-second-underscore" class="portability" compilers="Fpathcc,FpathCC,Fpathf95">
	      <example>-fno-second-underscore</example>
	      <![CDATA[
	         <p><b>CFP2006:</b></p>
	         <p>If  -funderscoring is in effect, and the original Fortran external 
	         identifier contained an underscore, -fsecond-underscore appends 
	         a second underscore to  the one added  by  -funderscoring.   
	         -fno-second-underscore  does  not append a second underscore.  
	         The default is both -funderscoring and -fsecond-underscore, the
	         same defaults as g77 uses.  -fno-second-underscore correcponds
	         to the default policies of PGI Fortran and Intel Fortran.</p>
	
	      ]]>
	   </flag>
	
  	<!-- /PathScale Portability flags -->
 
  	<!-- PGI Portability flags -->
  
		<flag name="Mnomain" class="portability"
		 compilers="pgf90" 
		 regexp="-Mnomain\b">
		<![CDATA[
		 <p>Don't include Fortran main program object module.</p>
		]]>
		<example>-Mnomain</example>
		</flag>
	
  	<!-- /PGI Portability flags -->

<!-- /PORTABILITY -->

<!-- COMPILER -->

  	<!-- PathScale Compiler flags -->
  
	   <flag name="Fpathcc" class="compiler" regexp="pathcc">
	      <example>pathcc</example>
	      <![CDATA[
	         <p>Invoke the PathScale C compiler.<br />
	         Also used to invoke linker for C programs.</p>
	      ]]>
	   </flag>
	
	   <flag name="FpathCC" class="compiler" regexp="pathCC">
	      <example>pathCC</example>
	      <![CDATA[
	         <p>Invoke the PathScale C++ compiler.<br />
	         Also used to invoke linker for C++ programs.</p>
	      ]]>
	   </flag>
	
	   <flag name="Fpathf95" class="compiler" regexp="pathf95">
	      <example>pathf95</example>
	      <![CDATA[
	         <p>Invoke the PathScale Fortran 77, 90 and 95 compilers. <br />
	         Also used to invoke linker for Fortran programs and
	         for mixed C / Fortran.  pathf90 and pathf95 are synonymous.</p>
	      ]]>
	   </flag>

  	<!-- /PathScale Compiler flags -->
 
  	<!-- PGI Compiler flags -->

		<flag name='pgcc' class='compiler' regexp="pgcc\b" >
		<![CDATA[
		 <p>The PGI C compiler.</p>
		 ]]>
		<example>pgcc</example>
		</flag>
		
		<flag name='pgCC' class='compiler' regexp="pgCC\b">
		<![CDATA[
		 <p>The PGI C++ compiler.</p>
		 ]]>
		<example>pgCC</example>
		</flag>
		  
		<flag name='pgf90' class='compiler' regexp="pgf90\b" >
		<![CDATA[
		 <p>The PGI Fortran 90/95 compiler.</p>
		 ]]>
		<example>pgf90</example>
		</flag>

  	<!-- /PGI Compiler flags -->

<!-- /COMPILER -->

<!-- OTHER -->

  	<!-- PGI Other flags -->
  	
		<flag name="w" class="other"
		 compilers="pgcc, pgCC, pgf90" regexp="-w\b">
		<![CDATA[
		 <p>Disable warning messages.</p>
		]]>
		<example>-w</example>
		</flag>
		
	<!-- PGI Other flags -->

<!-- /OTHER -->

</flagsdescription>
