/* * Copyright (C) 2018 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #ifndef SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_ #define SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_ #include #include #include #include #include #include #include "perfetto/base/logging.h" namespace perfetto { namespace trace_processor { // An SQL string which retains knowledge of the source of the SQL (i.e. stdlib // module, ExecuteQuery etc). It also supports "rewriting" parts or all of the // SQL string with a different string which is useful in cases where SQL is // substituted such as macros or function inlining. class SqlSource { public: class Rewriter; // Creates a SqlSource instance wrapping SQL passed to // |TraceProcessor::ExecuteQuery|. static SqlSource FromExecuteQuery(std::string sql); // Creates a SqlSource instance wrapping SQL executed when running a metric. static SqlSource FromMetric(std::string sql, const std::string& metric_file); // Creates a SqlSource instance wrapping SQL executed when running a metric // file (i.e. with RUN_METRIC). static SqlSource FromMetricFile(std::string sql, const std::string& metric_file); // Creates a SqlSource instance wrapping SQL executed when including a module. static SqlSource FromModuleInclude(std::string sql, const std::string& module); // Creates a SqlSource instance wrapping SQL which is an internal // implementation detail of trace processor. static SqlSource FromTraceProcessorImplementation(std::string sql); // Returns this SqlSource instance as a string which can be appended as a // "traceback" frame to an error message. Callers should pass an |offset| // parameter which indicates the exact location of the error in the SQL // string. 0 and |sql().size()| are both valid offset positions and correspond // to the start and end of the source respectively. // // Specifically, this string will include: // a) context about the source of the SQL // b) line and column number of the error // c) a snippet of the SQL and a caret (^) character pointing to the location // of the error. std::string AsTraceback(uint32_t offset) const; // Same as |AsTraceback| but for offsets which come from SQLite instead of // from trace processor tokenization or parsing. std::string AsTracebackForSqliteOffset(std::optional offset) const; // Creates a SqlSource instance with the SQL taken as a substring starting // at |offset| with |len| characters. SqlSource Substr(uint32_t offset, uint32_t len) const; // Rewrites the SQL backing |this| to SQL from |source| ignoring any existing // rewrites in |this|. // // This is useful when PerfettoSQL statements are transpiled into SQLite // statements but we want to preserve the context of the original statement. SqlSource RewriteAllIgnoreExisting(SqlSource source) const; // Returns the SQL string backing this SqlSource instance; const std::string& sql() const { return root_.rewritten_sql; } // Returns the original SQL string backing this SqlSource instance; const std::string& original_sql() const { return root_.original_sql; } // Returns whether this SqlSource has been rewritten. bool IsRewritten() const { return root_.IsRewritten(); } private: struct Rewrite; // Represents a tree of SQL rewrites, preserving the source for each rewrite. // // Suppose that we have the following situation: // User: `SELECT foo!(a) FROM bar!(slice) a` // foo : `$1.x, $1.y` // bar : `(SELECT baz!($1) FROM $1)` // baz : `$1.x, $1.y, $1.z` // // We want to expand this to // ```SELECT a.x, a.y FROM (SELECT slice.x, slice.y, slice.z FROM slice) a``` // while retaining information about the source of the rewrite. // // For example, the string `a.x, a.y` came from foo, `slice.x, slice.y, // slice.z` came from bar, which itself recursively came from baz etc. // // The purpose of this class is to keep track of the information required for // this "tree" of rewrites (i.e. expansions). In the example above, the tree // would look as follows: // User // / | // foo bar // / // baz // // The properties in each of these nodes is as follows: // User { // original_sql: "SELECT foo!(a) FROM bar!(slice) a" // rewritten_sql: "SELECT a.x, a.y FROM (SELECT slice.x, slice.y, slice.z // FROM slice) a" // rewrites: [ // {original_sql_start: 7, original_sql_end: 14, node: foo}, // {original_sql_start: 20, original_sql_end: 31, node: bar}] // ] // } // foo { // original_sql: "$1.x, $1.y" // rewritten_sql: "a.x, a.y" // rewrites: [] // } // bar { // original_sql: "(SELECT baz!($1) FROM $1 LIMIT 1)" // rewritten_sql: "(SELECT slice.x, slice.y, slice.z FROM slice)" // rewrites: [{original_sql_start: 8, original_sql_end: 16, node: baz}] // } // baz { // original_sql = "$1.x, $1.y, $1.z" // rewritten_sql = "slice.x, slice.y, slice.z" // rewrites: [] // } struct Node { std::string name; bool include_traceback_header = false; uint32_t line = 1; uint32_t col = 1; // The original SQL string used to create this node. std::string original_sql; // The list of rewrites which are applied to |original_sql| ordered by the // offsets. std::vector rewrites; // The SQL string which is the result of applying |rewrites| to // |original_sql|. See |SqlSource::ApplyRewrites| for details on how this is // computed. std::string rewritten_sql; // Returns the "traceback" for this node and all recursive nodes. See // |SqlSource::AsTraceback| for details. std::string AsTraceback(uint32_t rewritten_offset) const; // Returns the "traceback" for this node only. See |SqlSource::AsTraceback| // for details. std::string SelfTraceback(uint32_t rewritten_offset, uint32_t original_offset) const; Node Substr(uint32_t rewritten_offset, uint32_t rewritten_len) const; bool IsRewritten() const { PERFETTO_CHECK(rewrites.empty() == (original_sql == rewritten_sql)); return !rewrites.empty(); } // Given a |rewritten_offset| for this node, returns the offset into the // |original_sql| which matches that |rewritten_offset|. // // IMPORTANT: if |rewritten_offset| is *inside* a rewrite, the original // offset will point to the *start of the rewrite*. For example, if // we have: // original_sql: "SELECT foo!(a) FROM slice a" // rewritten_sql: "SELECT a.x, a.y FROM slice a" // rewrites: [ // { // original_sql_start: 7, // original_sql_end: 14, // rewritten_sql_start: 7, // rewritten_sql_end: 15, // node: foo // } // ] // then: // RewrittenOffsetToOriginalOffset(7) == 7 // 7 = start of foo // RewrittenOffsetToOriginalOffset(14) == 7 // 7 = start of foo // RewrittenOffsetToOriginalOffset(15) == 14 // 14 = end of foo // RewrittenOffsetToOriginalOffset(16) == 15 uint32_t RewrittenOffsetToOriginalOffset(uint32_t rewritten_offset) const; // Given an |original_offset| for this node, returns the index of a // rewrite whose original range contains |original_offset|. // Returns std::nullopt if there is no such rewrite. std::optional RewriteForOriginalOffset( uint32_t original_offset) const; }; // Defines a rewrite. See the documentation for |SqlSource::Node| for details // on this. struct Rewrite { // The start and end offsets in |original_sql|. uint32_t original_sql_start; uint32_t original_sql_end; // The start and end offsets in |rewritten_sql|. uint32_t rewritten_sql_start; uint32_t rewritten_sql_end; // Node containing the SQL which replaces the segment of SQL in // |original_sql|. Node rewrite_node; }; SqlSource(); explicit SqlSource(Node); SqlSource(std::string sql, std::string name, bool include_traceback_header); static std::string ApplyRewrites(const std::string&, const std::vector&); Node root_; }; // Used to rewrite a SqlSource using SQL from other SqlSources. class SqlSource::Rewriter { public: // Creates a Rewriter object which can be used to rewrite the SQL backing // |source|. // // Note that rewrites of portions of the SQL which have already been rewritten // is supported but *only in limited cases*. Specifically, the new rewrite // must not cross the boundary of any existing rewrite. // // For example, if we have: // SqlSource { // original_sql: "SELECT foo!(a) FROM bar!(slice) a" // rewritten_sql: "SELECT a.x, a.y FROM (SELECT slice.x FROM slice) a" // } // then the following are valid: // # Replaces "SELECT " with "INSERT ". Valid because it does not touch // # any rewrite. // Rewrite(0, 7, "INSERT ") // // # Replaces "a.x, a." with "a.z, ". Valid because it only touches the // # contents of the existing "foo" rewrite. // Rewrite(7, 14, "a.z, ") // while the following are invalid: // # Fails to replace "SELECT a" with "I". Invalid because it affects both // # non-rewritten source and the "foo" rewrite. // Rewrite(0, 8, "I") // // # Fails to replace "a.x, a.y FROM (" with "(". Invalid because it affects // # the "foo" rewrite, non-rewritten source and the "bar" rewrite. // Rewrite(7, 23, "(") explicit Rewriter(SqlSource source); // Replaces the SQL in |source.rewritten_sql| between |rewritten_start| and // |rewritten_end| with the contents of |rewrite|. // // Note that calls to Rewrite must be monontonic and non-overlapping. i.e. // if Rewrite(0, 10) is called, the next |rewritten_end| must be greater than // or equal to 10. // // Note also that all offsets passed to this function correspond to offsets // into |source.rewritten_sql|: past calls to rewrite do not affect future // offsets. void Rewrite(uint32_t rewritten_start, uint32_t rewritten_end, SqlSource rewrite); // Returns the rewritten SqlSource instance. SqlSource Build() &&; private: explicit Rewriter(Node); Node orig_; std::vector nested_; std::vector non_nested_; }; } // namespace trace_processor } // namespace perfetto #endif // SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_