• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2016 Google Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <algorithm>
16 #include <cassert>
17 #include <cstring>
18 #include <fstream>
19 #include <iostream>
20 #include <memory>
21 #include <sstream>
22 #include <string>
23 #include <vector>
24 
25 #include "source/opt/log.h"
26 #include "source/spirv_target_env.h"
27 #include "source/util/string_utils.h"
28 #include "spirv-tools/libspirv.hpp"
29 #include "spirv-tools/optimizer.hpp"
30 #include "tools/io.h"
31 #include "tools/util/cli_consumer.h"
32 
33 namespace {
34 
35 // Status and actions to perform after parsing command-line arguments.
36 enum OptActions { OPT_CONTINUE, OPT_STOP };
37 
38 struct OptStatus {
39   OptActions action;
40   int code;
41 };
42 
43 // Message consumer for this tool.  Used to emit diagnostics during
44 // initialization and setup. Note that |source| and |position| are irrelevant
45 // here because we are still not processing a SPIR-V input file.
opt_diagnostic(spv_message_level_t level,const char *,const spv_position_t &,const char * message)46 void opt_diagnostic(spv_message_level_t level, const char* /*source*/,
47                     const spv_position_t& /*positon*/, const char* message) {
48   if (level == SPV_MSG_ERROR) {
49     fprintf(stderr, "error: ");
50   }
51   fprintf(stderr, "%s\n", message);
52 }
53 
GetListOfPassesAsString(const spvtools::Optimizer & optimizer)54 std::string GetListOfPassesAsString(const spvtools::Optimizer& optimizer) {
55   std::stringstream ss;
56   for (const auto& name : optimizer.GetPassNames()) {
57     ss << "\n\t\t" << name;
58   }
59   return ss.str();
60 }
61 
62 const auto kDefaultEnvironment = SPV_ENV_UNIVERSAL_1_5;
63 
GetLegalizationPasses()64 std::string GetLegalizationPasses() {
65   spvtools::Optimizer optimizer(kDefaultEnvironment);
66   optimizer.RegisterLegalizationPasses();
67   return GetListOfPassesAsString(optimizer);
68 }
69 
GetOptimizationPasses()70 std::string GetOptimizationPasses() {
71   spvtools::Optimizer optimizer(kDefaultEnvironment);
72   optimizer.RegisterPerformancePasses();
73   return GetListOfPassesAsString(optimizer);
74 }
75 
GetSizePasses()76 std::string GetSizePasses() {
77   spvtools::Optimizer optimizer(kDefaultEnvironment);
78   optimizer.RegisterSizePasses();
79   return GetListOfPassesAsString(optimizer);
80 }
81 
GetVulkanToWebGPUPasses()82 std::string GetVulkanToWebGPUPasses() {
83   spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_1);
84   optimizer.RegisterVulkanToWebGPUPasses();
85   return GetListOfPassesAsString(optimizer);
86 }
87 
GetWebGPUToVulkanPasses()88 std::string GetWebGPUToVulkanPasses() {
89   spvtools::Optimizer optimizer(SPV_ENV_WEBGPU_0);
90   optimizer.RegisterWebGPUToVulkanPasses();
91   return GetListOfPassesAsString(optimizer);
92 }
93 
PrintUsage(const char * program)94 void PrintUsage(const char* program) {
95   std::string target_env_list = spvTargetEnvList(16, 80);
96   // NOTE: Please maintain flags in lexicographical order.
97   printf(
98       R"(%s - Optimize a SPIR-V binary file.
99 
100 USAGE: %s [options] [<input>] -o <output>
101 
102 The SPIR-V binary is read from <input>. If no file is specified,
103 or if <input> is "-", then the binary is read from standard input.
104 if <output> is "-", then the optimized output is written to
105 standard output.
106 
107 NOTE: The optimizer is a work in progress.
108 
109 Options (in lexicographical order):)",
110       program, program);
111   printf(R"(
112   --amd-ext-to-khr
113                Replaces the extensions VK_AMD_shader_ballot, VK_AMD_gcn_shader,
114                and VK_AMD_shader_trinary_minmax with equivalent code using core
115                instructions and capabilities.)");
116   printf(R"(
117   --ccp
118                Apply the conditional constant propagation transform.  This will
119                propagate constant values throughout the program, and simplify
120                expressions and conditional jumps with known predicate
121                values.  Performed on entry point call tree functions and
122                exported functions.)");
123   printf(R"(
124   --cfg-cleanup
125                Cleanup the control flow graph. This will remove any unnecessary
126                code from the CFG like unreachable code. Performed on entry
127                point call tree functions and exported functions.)");
128   printf(R"(
129   --combine-access-chains
130                Combines chained access chains to produce a single instruction
131                where possible.)");
132   printf(R"(
133   --compact-ids
134                Remap result ids to a compact range starting from %%1 and without
135                any gaps.)");
136   printf(R"(
137   --convert-local-access-chains
138                Convert constant index access chain loads/stores into
139                equivalent load/stores with inserts and extracts. Performed
140                on function scope variables referenced only with load, store,
141                and constant index access chains in entry point call tree
142                functions.)");
143   printf(R"(
144   --convert-relaxed-to-half
145                Convert all RelaxedPrecision arithmetic operations to half
146                precision, inserting conversion operations where needed.
147                Run after function scope variable load and store elimination
148                for better results. Simplify-instructions, redundancy-elimination
149                and DCE should be run after this pass to eliminate excess
150                conversions. This conversion is useful when the target platform
151                does not support RelaxedPrecision or ignores it. This pass also
152                removes all RelaxedPrecision decorations.)");
153   printf(R"(
154   --copy-propagate-arrays
155                Does propagation of memory references when an array is a copy of
156                another.  It will only propagate an array if the source is never
157                written to, and the only store to the target is the copy.)");
158   printf(R"(
159   --decompose-initialized-variables
160                Decomposes initialized variable declarations into a declaration
161                followed by a store of the initial value. This is done to work
162                around known issues with some Vulkan drivers for initialize
163                variables.)");
164   printf(R"(
165   --descriptor-scalar-replacement
166                Replaces every array variable |desc| that has a DescriptorSet
167                and Binding decorations with a new variable for each element of
168                the array.  Suppose |desc| was bound at binding |b|.  Then the
169                variable corresponding to |desc[i]| will have binding |b+i|.
170                The descriptor set will be the same.  All accesses to |desc|
171                must be in OpAccessChain instructions with a literal index for
172                the first index.)");
173   printf(R"(
174   --eliminate-dead-branches
175                Convert conditional branches with constant condition to the
176                indicated unconditional branch. Delete all resulting dead
177                code. Performed only on entry point call tree functions.)");
178   printf(R"(
179   --eliminate-dead-code-aggressive
180                Delete instructions which do not contribute to a function's
181                output. Performed only on entry point call tree functions.)");
182   printf(R"(
183   --eliminate-dead-const
184                Eliminate dead constants.)");
185   printf(R"(
186   --eliminate-dead-functions
187                Deletes functions that cannot be reached from entry points or
188                exported functions.)");
189   printf(R"(
190   --eliminate-dead-inserts
191                Deletes unreferenced inserts into composites, most notably
192                unused stores to vector components, that are not removed by
193                aggressive dead code elimination.)");
194   printf(R"(
195   --eliminate-dead-variables
196                Deletes module scope variables that are not referenced.)");
197   printf(R"(
198   --eliminate-insert-extract
199                DEPRECATED.  This pass has been replaced by the simplification
200                pass, and that pass will be run instead.
201                See --simplify-instructions.)");
202   printf(R"(
203   --eliminate-local-multi-store
204                Replace stores and loads of function scope variables that are
205                stored multiple times. Performed on variables referenceed only
206                with loads and stores. Performed only on entry point call tree
207                functions.)");
208   printf(R"(
209   --eliminate-local-single-block
210                Perform single-block store/load and load/load elimination.
211                Performed only on function scope variables in entry point
212                call tree functions.)");
213   printf(R"(
214   --eliminate-local-single-store
215                Replace stores and loads of function scope variables that are
216                only stored once. Performed on variables referenceed only with
217                loads and stores. Performed only on entry point call tree
218                functions.)");
219   printf(R"(
220   --flatten-decorations
221                Replace decoration groups with repeated OpDecorate and
222                OpMemberDecorate instructions.)");
223   printf(R"(
224   --fold-spec-const-op-composite
225                Fold the spec constants defined by OpSpecConstantOp or
226                OpSpecConstantComposite instructions to front-end constants
227                when possible.)");
228   printf(R"(
229   --freeze-spec-const
230                Freeze the values of specialization constants to their default
231                values.)");
232   printf(R"(
233   --graphics-robust-access
234                Clamp indices used to access buffers and internal composite
235                values, providing guarantees that satisfy Vulkan's
236                robustBufferAccess rules.)");
237   printf(R"(
238   --generate-webgpu-initializers
239                Adds initial values to OpVariable instructions that are missing
240                them, due to their storage type requiring them for WebGPU.)");
241   printf(R"(
242   --if-conversion
243                Convert if-then-else like assignments into OpSelect.)");
244   printf(R"(
245   --inline-entry-points-exhaustive
246                Exhaustively inline all function calls in entry point call tree
247                functions. Currently does not inline calls to functions with
248                early return in a loop.)");
249   printf(R"(
250   --legalize-hlsl
251                Runs a series of optimizations that attempts to take SPIR-V
252                generated by an HLSL front-end and generates legal Vulkan SPIR-V.
253                The optimizations are:
254                %s
255 
256                Note this does not guarantee legal code. This option passes the
257                option --relax-logical-pointer to the validator.)",
258          GetLegalizationPasses().c_str());
259   printf(R"(
260   --legalize-vector-shuffle
261                Converts any usages of 0xFFFFFFFF for the literals in
262                OpVectorShuffle to a literal 0. This is done since 0xFFFFFFFF is
263                forbidden in WebGPU.)");
264   printf(R"(
265   --local-redundancy-elimination
266                Looks for instructions in the same basic block that compute the
267                same value, and deletes the redundant ones.)");
268   printf(R"(
269   --loop-fission
270                Splits any top level loops in which the register pressure has
271                exceeded a given threshold. The threshold must follow the use of
272                this flag and must be a positive integer value.)");
273   printf(R"(
274   --loop-fusion
275                Identifies adjacent loops with the same lower and upper bound.
276                If this is legal, then merge the loops into a single loop.
277                Includes heuristics to ensure it does not increase number of
278                registers too much, while reducing the number of loads from
279                memory. Takes an additional positive integer argument to set
280                the maximum number of registers.)");
281   printf(R"(
282   --loop-invariant-code-motion
283                Identifies code in loops that has the same value for every
284                iteration of the loop, and move it to the loop pre-header.)");
285   printf(R"(
286   --loop-unroll
287                Fully unrolls loops marked with the Unroll flag)");
288   printf(R"(
289   --loop-unroll-partial
290                Partially unrolls loops marked with the Unroll flag. Takes an
291                additional non-0 integer argument to set the unroll factor, or
292                how many times a loop body should be duplicated)");
293   printf(R"(
294   --loop-peeling
295                Execute few first (respectively last) iterations before
296                (respectively after) the loop if it can elide some branches.)");
297   printf(R"(
298   --loop-peeling-threshold
299                Takes a non-0 integer argument to set the loop peeling code size
300                growth threshold. The threshold prevents the loop peeling
301                from happening if the code size increase created by
302                the optimization is above the threshold.)");
303   printf(R"(
304   --max-id-bound=<n>
305                Sets the maximum value for the id bound for the module.  The
306                default is the minimum value for this limit, 0x3FFFFF.  See
307                section 2.17 of the Spir-V specification.)");
308   printf(R"(
309   --merge-blocks
310                Join two blocks into a single block if the second has the
311                first as its only predecessor. Performed only on entry point
312                call tree functions.)");
313   printf(R"(
314   --merge-return
315                Changes functions that have multiple return statements so they
316                have a single return statement.
317 
318                For structured control flow it is assumed that the only
319                unreachable blocks in the function are trivial merge and continue
320                blocks.
321 
322                A trivial merge block contains the label and an OpUnreachable
323                instructions, nothing else.  A trivial continue block contain a
324                label and an OpBranch to the header, nothing else.
325 
326                These conditions are guaranteed to be met after running
327                dead-branch elimination.)");
328   printf(R"(
329   --loop-unswitch
330                Hoists loop-invariant conditionals out of loops by duplicating
331                the loop on each branch of the conditional and adjusting each
332                copy of the loop.)");
333   printf(R"(
334   -O
335                Optimize for performance. Apply a sequence of transformations
336                in an attempt to improve the performance of the generated
337                code. For this version of the optimizer, this flag is equivalent
338                to specifying the following optimization code names:
339                %s)",
340          GetOptimizationPasses().c_str());
341   printf(R"(
342   -Os
343                Optimize for size. Apply a sequence of transformations in an
344                attempt to minimize the size of the generated code. For this
345                version of the optimizer, this flag is equivalent to specifying
346                the following optimization code names:
347                %s
348 
349                NOTE: The specific transformations done by -O and -Os change
350                      from release to release.)",
351          GetSizePasses().c_str());
352   printf(R"(
353   -Oconfig=<file>
354                Apply the sequence of transformations indicated in <file>.
355                This file contains a sequence of strings separated by whitespace
356                (tabs, newlines or blanks). Each string is one of the flags
357                accepted by spirv-opt. Optimizations will be applied in the
358                sequence they appear in the file. This is equivalent to
359                specifying all the flags on the command line. For example,
360                given the file opts.cfg with the content:
361 
362                 --inline-entry-points-exhaustive
363                 --eliminate-dead-code-aggressive
364 
365                The following two invocations to spirv-opt are equivalent:
366 
367                $ spirv-opt -Oconfig=opts.cfg program.spv
368 
369                $ spirv-opt --inline-entry-points-exhaustive \
370                     --eliminate-dead-code-aggressive program.spv
371 
372                Lines starting with the character '#' in the configuration
373                file indicate a comment and will be ignored.
374 
375                The -O, -Os, and -Oconfig flags act as macros. Using one of them
376                is equivalent to explicitly inserting the underlying flags at
377                that position in the command line. For example, the invocation
378                'spirv-opt --merge-blocks -O ...' applies the transformation
379                --merge-blocks followed by all the transformations implied by
380                -O.)");
381   printf(R"(
382   --preserve-bindings
383                Ensure that the optimizer preserves all bindings declared within
384                the module, even when those bindings are unused.)");
385   printf(R"(
386   --preserve-spec-constants
387                Ensure that the optimizer preserves all specialization constants declared
388                within the module, even when those constants are unused.)");
389   printf(R"(
390   --print-all
391                Print SPIR-V assembly to standard error output before each pass
392                and after the last pass.)");
393   printf(R"(
394   --private-to-local
395                Change the scope of private variables that are used in a single
396                function to that function.)");
397   printf(R"(
398   --reduce-load-size
399                Replaces loads of composite objects where not every component is
400                used by loads of just the elements that are used.)");
401   printf(R"(
402   --redundancy-elimination
403                Looks for instructions in the same function that compute the
404                same value, and deletes the redundant ones.)");
405   printf(R"(
406   --relax-float-ops
407                Decorate all float operations with RelaxedPrecision if not already
408                so decorated. This does not decorate types or variables.)");
409   printf(R"(
410   --relax-struct-store
411                Allow store from one struct type to a different type with
412                compatible layout and members. This option is forwarded to the
413                validator.)");
414   printf(R"(
415   --remove-duplicates
416                Removes duplicate types, decorations, capabilities and extension
417                instructions.)");
418   printf(R"(
419   --replace-invalid-opcode
420                Replaces instructions whose opcode is valid for shader modules,
421                but not for the current shader stage.  To have an effect, all
422                entry points must have the same execution model.)");
423   printf(R"(
424   --ssa-rewrite
425                Replace loads and stores to function local variables with
426                operations on SSA IDs.)");
427   printf(R"(
428   --scalar-replacement[=<n>]
429                Replace aggregate function scope variables that are only accessed
430                via their elements with new function variables representing each
431                element.  <n> is a limit on the size of the aggregates that will
432                be replaced.  0 means there is no limit.  The default value is
433                100.)");
434   printf(R"(
435   --set-spec-const-default-value "<spec id>:<default value> ..."
436                Set the default values of the specialization constants with
437                <spec id>:<default value> pairs specified in a double-quoted
438                string. <spec id>:<default value> pairs must be separated by
439                blank spaces, and in each pair, spec id and default value must
440                be separated with colon ':' without any blank spaces in between.
441                e.g.: --set-spec-const-default-value "1:100 2:400")");
442   printf(R"(
443   --simplify-instructions
444                Will simplify all instructions in the function as much as
445                possible.)");
446   printf(R"(
447   --split-invalid-unreachable
448                Attempts to legalize for WebGPU cases where an unreachable
449                merge-block is also a continue-target by splitting it into two
450                separate blocks. There exist legal, for Vulkan, instances of this
451                pattern that cannot be converted into legal WebGPU, so this
452                conversion may not succeed.)");
453   printf(R"(
454   --skip-validation
455                Will not validate the SPIR-V before optimizing.  If the SPIR-V
456                is invalid, the optimizer may fail or generate incorrect code.
457                This options should be used rarely, and with caution.)");
458   printf(R"(
459   --strength-reduction
460                Replaces instructions with equivalent and less expensive ones.)");
461   printf(R"(
462   --strip-atomic-counter-memory
463                Removes AtomicCountMemory bit from memory semantics values.)");
464   printf(R"(
465   --strip-debug
466                Remove all debug instructions.)");
467   printf(R"(
468   --strip-reflect
469                Remove all reflection information.  For now, this covers
470                reflection information defined by SPV_GOOGLE_hlsl_functionality1
471                and SPV_KHR_non_semantic_info)");
472   printf(R"(
473   --target-env=<env>
474                Set the target environment. Without this flag the target
475                environment defaults to spv1.5. <env> must be one of
476                {%s})",
477          target_env_list.c_str());
478   printf(R"(
479   --time-report
480                Print the resource utilization of each pass (e.g., CPU time,
481                RSS) to standard error output. Currently it supports only Unix
482                systems. This option is the same as -ftime-report in GCC. It
483                prints CPU/WALL/USR/SYS time (and RSS if possible), but note that
484                USR/SYS time are returned by getrusage() and can have a small
485                error.)");
486   printf(R"(
487   --upgrade-memory-model
488                Upgrades the Logical GLSL450 memory model to Logical VulkanKHR.
489                Transforms memory, image, atomic and barrier operations to conform
490                to that model's requirements.)");
491   printf(R"(
492   --vector-dce
493                This pass looks for components of vectors that are unused, and
494                removes them from the vector.  Note this would still leave around
495                lots of dead code that a pass of ADCE will be able to remove.)");
496   printf(R"(
497   --vulkan-to-webgpu
498                Turns on the prescribed passes for converting from Vulkan to
499                WebGPU and sets the target environment to webgpu0. Other passes
500                may be turned on via additional flags, but such combinations are
501                not tested.
502                Using --target-env with this flag is not allowed.
503 
504                This flag is the equivalent of passing in --target-env=webgpu0
505                and specifying the following optimization code names:
506                %s
507 
508                NOTE: This flag is a WIP and its behaviour is subject to change.)",
509          GetVulkanToWebGPUPasses().c_str());
510   printf(R"(
511   --webgpu-to-vulkan
512                Turns on the prescribed passes for converting from WebGPU to
513                Vulkan and sets the target environment to vulkan1.1. Other passes
514                may be turned on via additional flags, but such combinations are
515                not tested.
516                Using --target-env with this flag is not allowed.
517 
518                This flag is the equivalent of passing in --target-env=vulkan1.1
519                and specifying the following optimization code names:
520                %s
521 
522                NOTE: This flag is a WIP and its behaviour is subject to change.)",
523          GetWebGPUToVulkanPasses().c_str());
524   printf(R"(
525   --workaround-1209
526                Rewrites instructions for which there are known driver bugs to
527                avoid triggering those bugs.
528                Current workarounds: Avoid OpUnreachable in loops.)");
529   printf(R"(
530   --wrap-opkill
531                Replaces all OpKill instructions in functions that can be called
532                from a continue construct with a function call to a function
533                whose only instruction is an OpKill.  This is done to enable
534                inlining on these functions.
535                )");
536   printf(R"(
537   --unify-const
538                Remove the duplicated constants.)");
539   printf(R"(
540   --validate-after-all
541                Validate the module after each pass is performed.)");
542   printf(R"(
543   -h, --help
544                Print this help.)");
545   printf(R"(
546   --version
547                Display optimizer version information.
548 )");
549 }
550 
551 // Reads command-line flags  the file specified in |oconfig_flag|. This string
552 // is assumed to have the form "-Oconfig=FILENAME". This function parses the
553 // string and extracts the file name after the '=' sign.
554 //
555 // Flags found in |FILENAME| are pushed at the end of the vector |file_flags|.
556 //
557 // This function returns true on success, false on failure.
ReadFlagsFromFile(const char * oconfig_flag,std::vector<std::string> * file_flags)558 bool ReadFlagsFromFile(const char* oconfig_flag,
559                        std::vector<std::string>* file_flags) {
560   const char* fname = strchr(oconfig_flag, '=');
561   if (fname == nullptr || fname[0] != '=') {
562     spvtools::Errorf(opt_diagnostic, nullptr, {}, "Invalid -Oconfig flag %s",
563                      oconfig_flag);
564     return false;
565   }
566   fname++;
567 
568   std::ifstream input_file;
569   input_file.open(fname);
570   if (input_file.fail()) {
571     spvtools::Errorf(opt_diagnostic, nullptr, {}, "Could not open file '%s'",
572                      fname);
573     return false;
574   }
575 
576   std::string line;
577   while (std::getline(input_file, line)) {
578     // Ignore empty lines and lines starting with the comment marker '#'.
579     if (line.length() == 0 || line[0] == '#') {
580       continue;
581     }
582 
583     // Tokenize the line.  Add all found tokens to the list of found flags. This
584     // mimics the way the shell will parse whitespace on the command line. NOTE:
585     // This does not support quoting and it is not intended to.
586     std::istringstream iss(line);
587     while (!iss.eof()) {
588       std::string flag;
589       iss >> flag;
590       file_flags->push_back(flag);
591     }
592   }
593 
594   return true;
595 }
596 
597 OptStatus ParseFlags(int argc, const char** argv,
598                      spvtools::Optimizer* optimizer, const char** in_file,
599                      const char** out_file,
600                      spvtools::ValidatorOptions* validator_options,
601                      spvtools::OptimizerOptions* optimizer_options);
602 
603 // Parses and handles the -Oconfig flag. |prog_name| contains the name of
604 // the spirv-opt binary (used to build a new argv vector for the recursive
605 // invocation to ParseFlags). |opt_flag| contains the -Oconfig=FILENAME flag.
606 // |optimizer|, |in_file|, |out_file|, |validator_options|, and
607 // |optimizer_options| are as in ParseFlags.
608 //
609 // This returns the same OptStatus instance returned by ParseFlags.
ParseOconfigFlag(const char * prog_name,const char * opt_flag,spvtools::Optimizer * optimizer,const char ** in_file,const char ** out_file,spvtools::ValidatorOptions * validator_options,spvtools::OptimizerOptions * optimizer_options)610 OptStatus ParseOconfigFlag(const char* prog_name, const char* opt_flag,
611                            spvtools::Optimizer* optimizer, const char** in_file,
612                            const char** out_file,
613                            spvtools::ValidatorOptions* validator_options,
614                            spvtools::OptimizerOptions* optimizer_options) {
615   std::vector<std::string> flags;
616   flags.push_back(prog_name);
617 
618   std::vector<std::string> file_flags;
619   if (!ReadFlagsFromFile(opt_flag, &file_flags)) {
620     spvtools::Error(opt_diagnostic, nullptr, {},
621                     "Could not read optimizer flags from configuration file");
622     return {OPT_STOP, 1};
623   }
624   flags.insert(flags.end(), file_flags.begin(), file_flags.end());
625 
626   const char** new_argv = new const char*[flags.size()];
627   for (size_t i = 0; i < flags.size(); i++) {
628     if (flags[i].find("-Oconfig=") != std::string::npos) {
629       spvtools::Error(
630           opt_diagnostic, nullptr, {},
631           "Flag -Oconfig= may not be used inside the configuration file");
632       return {OPT_STOP, 1};
633     }
634     new_argv[i] = flags[i].c_str();
635   }
636 
637   auto ret_val =
638       ParseFlags(static_cast<int>(flags.size()), new_argv, optimizer, in_file,
639                  out_file, validator_options, optimizer_options);
640   delete[] new_argv;
641   return ret_val;
642 }
643 
644 // Canonicalize the flag in |argv[argi]| of the form '--pass arg' into
645 // '--pass=arg'. The optimizer only accepts arguments to pass names that use the
646 // form '--pass_name=arg'.  Since spirv-opt also accepts the other form, this
647 // function makes the necessary conversion.
648 //
649 // Pass flags that require additional arguments should be handled here.  Note
650 // that additional arguments should be given as a single string.  If the flag
651 // requires more than one argument, the pass creator in
652 // Optimizer::GetPassFromFlag() should parse it accordingly (e.g., see the
653 // handler for --set-spec-const-default-value).
654 //
655 // If the argument requests one of the passes that need an additional argument,
656 // |argi| is modified to point past the current argument, and the string
657 // "argv[argi]=argv[argi + 1]" is returned. Otherwise, |argi| is unmodified and
658 // the string "|argv[argi]|" is returned.
CanonicalizeFlag(const char ** argv,int argc,int * argi)659 std::string CanonicalizeFlag(const char** argv, int argc, int* argi) {
660   const char* cur_arg = argv[*argi];
661   const char* next_arg = (*argi + 1 < argc) ? argv[*argi + 1] : nullptr;
662   std::ostringstream canonical_arg;
663   canonical_arg << cur_arg;
664 
665   // NOTE: DO NOT ADD NEW FLAGS HERE.
666   //
667   // These flags are supported for backwards compatibility.  When adding new
668   // passes that need extra arguments in its command-line flag, please make them
669   // use the syntax "--pass_name[=pass_arg].
670   if (0 == strcmp(cur_arg, "--set-spec-const-default-value") ||
671       0 == strcmp(cur_arg, "--loop-fission") ||
672       0 == strcmp(cur_arg, "--loop-fusion") ||
673       0 == strcmp(cur_arg, "--loop-unroll-partial") ||
674       0 == strcmp(cur_arg, "--loop-peeling-threshold")) {
675     if (next_arg) {
676       canonical_arg << "=" << next_arg;
677       ++(*argi);
678     }
679   }
680 
681   return canonical_arg.str();
682 }
683 
684 // Parses command-line flags. |argc| contains the number of command-line flags.
685 // |argv| points to an array of strings holding the flags. |optimizer| is the
686 // Optimizer instance used to optimize the program.
687 //
688 // On return, this function stores the name of the input program in |in_file|.
689 // The name of the output file in |out_file|. The return value indicates whether
690 // optimization should continue and a status code indicating an error or
691 // success.
ParseFlags(int argc,const char ** argv,spvtools::Optimizer * optimizer,const char ** in_file,const char ** out_file,spvtools::ValidatorOptions * validator_options,spvtools::OptimizerOptions * optimizer_options)692 OptStatus ParseFlags(int argc, const char** argv,
693                      spvtools::Optimizer* optimizer, const char** in_file,
694                      const char** out_file,
695                      spvtools::ValidatorOptions* validator_options,
696                      spvtools::OptimizerOptions* optimizer_options) {
697   std::vector<std::string> pass_flags;
698   bool target_env_set = false;
699   bool vulkan_to_webgpu_set = false;
700   bool webgpu_to_vulkan_set = false;
701   for (int argi = 1; argi < argc; ++argi) {
702     const char* cur_arg = argv[argi];
703     if ('-' == cur_arg[0]) {
704       if (0 == strcmp(cur_arg, "--version")) {
705         spvtools::Logf(opt_diagnostic, SPV_MSG_INFO, nullptr, {}, "%s\n",
706                        spvSoftwareVersionDetailsString());
707         return {OPT_STOP, 0};
708       } else if (0 == strcmp(cur_arg, "--help") || 0 == strcmp(cur_arg, "-h")) {
709         PrintUsage(argv[0]);
710         return {OPT_STOP, 0};
711       } else if (0 == strcmp(cur_arg, "-o")) {
712         if (!*out_file && argi + 1 < argc) {
713           *out_file = argv[++argi];
714         } else {
715           PrintUsage(argv[0]);
716           return {OPT_STOP, 1};
717         }
718       } else if ('\0' == cur_arg[1]) {
719         // Setting a filename of "-" to indicate stdin.
720         if (!*in_file) {
721           *in_file = cur_arg;
722         } else {
723           spvtools::Error(opt_diagnostic, nullptr, {},
724                           "More than one input file specified");
725           return {OPT_STOP, 1};
726         }
727       } else if (0 == strncmp(cur_arg, "-Oconfig=", sizeof("-Oconfig=") - 1)) {
728         OptStatus status =
729             ParseOconfigFlag(argv[0], cur_arg, optimizer, in_file, out_file,
730                              validator_options, optimizer_options);
731         if (status.action != OPT_CONTINUE) {
732           return status;
733         }
734       } else if (0 == strcmp(cur_arg, "--skip-validation")) {
735         optimizer_options->set_run_validator(false);
736       } else if (0 == strcmp(cur_arg, "--print-all")) {
737         optimizer->SetPrintAll(&std::cerr);
738       } else if (0 == strcmp(cur_arg, "--preserve-bindings")) {
739         optimizer_options->set_preserve_bindings(true);
740       } else if (0 == strcmp(cur_arg, "--preserve-spec-constants")) {
741         optimizer_options->set_preserve_spec_constants(true);
742       } else if (0 == strcmp(cur_arg, "--time-report")) {
743         optimizer->SetTimeReport(&std::cerr);
744       } else if (0 == strcmp(cur_arg, "--relax-struct-store")) {
745         validator_options->SetRelaxStructStore(true);
746       } else if (0 == strncmp(cur_arg, "--max-id-bound=",
747                               sizeof("--max-id-bound=") - 1)) {
748         auto split_flag = spvtools::utils::SplitFlagArgs(cur_arg);
749         // Will not allow values in the range [2^31,2^32).
750         uint32_t max_id_bound =
751             static_cast<uint32_t>(atoi(split_flag.second.c_str()));
752 
753         // That SPIR-V mandates the minimum value for max id bound but
754         // implementations may allow higher minimum bounds.
755         if (max_id_bound < kDefaultMaxIdBound) {
756           spvtools::Error(opt_diagnostic, nullptr, {},
757                           "The max id bound must be at least 0x3FFFFF");
758           return {OPT_STOP, 1};
759         }
760         optimizer_options->set_max_id_bound(max_id_bound);
761         validator_options->SetUniversalLimit(spv_validator_limit_max_id_bound,
762                                              max_id_bound);
763       } else if (0 == strncmp(cur_arg,
764                               "--target-env=", sizeof("--target-env=") - 1)) {
765         target_env_set = true;
766         if (vulkan_to_webgpu_set) {
767           spvtools::Error(opt_diagnostic, nullptr, {},
768                           "--vulkan-to-webgpu defines the target environment, "
769                           "so --target-env cannot be set at the same time");
770           return {OPT_STOP, 1};
771         }
772         if (webgpu_to_vulkan_set) {
773           spvtools::Error(opt_diagnostic, nullptr, {},
774                           "--webgpu-to-vulkan defines the target environment, "
775                           "so --target-env cannot be set at the same time");
776           return {OPT_STOP, 1};
777         }
778         const auto split_flag = spvtools::utils::SplitFlagArgs(cur_arg);
779         const auto target_env_str = split_flag.second.c_str();
780         spv_target_env target_env;
781         if (!spvParseTargetEnv(target_env_str, &target_env)) {
782           spvtools::Error(opt_diagnostic, nullptr, {},
783                           "Invalid value passed to --target-env");
784           return {OPT_STOP, 1};
785         }
786         optimizer->SetTargetEnv(target_env);
787       } else if (0 == strcmp(cur_arg, "--vulkan-to-webgpu")) {
788         vulkan_to_webgpu_set = true;
789         if (target_env_set) {
790           spvtools::Error(opt_diagnostic, nullptr, {},
791                           "--vulkan-to-webgpu defines the target environment, "
792                           "so --target-env cannot be set at the same time");
793           return {OPT_STOP, 1};
794         }
795         if (webgpu_to_vulkan_set) {
796           spvtools::Error(opt_diagnostic, nullptr, {},
797                           "Cannot use both --webgpu-to-vulkan and "
798                           "--vulkan-to-webgpu at the same time, invoke twice "
799                           "if you are wanting to go to and from");
800           return {OPT_STOP, 1};
801         }
802 
803         optimizer->SetTargetEnv(SPV_ENV_VULKAN_1_1);
804         optimizer->RegisterVulkanToWebGPUPasses();
805       } else if (0 == strcmp(cur_arg, "--webgpu-to-vulkan")) {
806         webgpu_to_vulkan_set = true;
807         if (target_env_set) {
808           spvtools::Error(opt_diagnostic, nullptr, {},
809                           "--webgpu-to-vulkan defines the target environment, "
810                           "so --target-env cannot be set at the same time");
811           return {OPT_STOP, 1};
812         }
813         if (vulkan_to_webgpu_set) {
814           spvtools::Error(opt_diagnostic, nullptr, {},
815                           "Cannot use both --webgpu-to-vulkan and "
816                           "--vulkan-to-webgpu at the same time, invoke twice "
817                           "if you are wanting to go to and from");
818           return {OPT_STOP, 1};
819         }
820 
821         optimizer->SetTargetEnv(SPV_ENV_WEBGPU_0);
822         optimizer->RegisterWebGPUToVulkanPasses();
823       } else if (0 == strcmp(cur_arg, "--validate-after-all")) {
824         optimizer->SetValidateAfterAll(true);
825       } else {
826         // Some passes used to accept the form '--pass arg', canonicalize them
827         // to '--pass=arg'.
828         pass_flags.push_back(CanonicalizeFlag(argv, argc, &argi));
829 
830         // If we were requested to legalize SPIR-V generated from the HLSL
831         // front-end, skip validation.
832         if (0 == strcmp(cur_arg, "--legalize-hlsl")) {
833           validator_options->SetBeforeHlslLegalization(true);
834         }
835       }
836     } else {
837       if (!*in_file) {
838         *in_file = cur_arg;
839       } else {
840         spvtools::Error(opt_diagnostic, nullptr, {},
841                         "More than one input file specified");
842         return {OPT_STOP, 1};
843       }
844     }
845   }
846 
847   if (!optimizer->RegisterPassesFromFlags(pass_flags)) {
848     return {OPT_STOP, 1};
849   }
850 
851   return {OPT_CONTINUE, 0};
852 }
853 
854 }  // namespace
855 
main(int argc,const char ** argv)856 int main(int argc, const char** argv) {
857   const char* in_file = nullptr;
858   const char* out_file = nullptr;
859 
860   spv_target_env target_env = kDefaultEnvironment;
861 
862   spvtools::Optimizer optimizer(target_env);
863   optimizer.SetMessageConsumer(spvtools::utils::CLIMessageConsumer);
864 
865   spvtools::ValidatorOptions validator_options;
866   spvtools::OptimizerOptions optimizer_options;
867   OptStatus status = ParseFlags(argc, argv, &optimizer, &in_file, &out_file,
868                                 &validator_options, &optimizer_options);
869   optimizer_options.set_validator_options(validator_options);
870 
871   if (status.action == OPT_STOP) {
872     return status.code;
873   }
874 
875   if (out_file == nullptr) {
876     spvtools::Error(opt_diagnostic, nullptr, {}, "-o required");
877     return 1;
878   }
879 
880   std::vector<uint32_t> binary;
881   if (!ReadFile<uint32_t>(in_file, "rb", &binary)) {
882     return 1;
883   }
884 
885   // By using the same vector as input and output, we save time in the case
886   // that there was no change.
887   bool ok =
888       optimizer.Run(binary.data(), binary.size(), &binary, optimizer_options);
889 
890   if (!WriteFile<uint32_t>(out_file, "wb", binary.data(), binary.size())) {
891     return 1;
892   }
893 
894   return ok ? 0 : 1;
895 }
896