1 /*
2  * Copyright (C) 2024 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_REDACTION_TRACE_REDACTION_FRAMEWORK_H_
18 #define SRC_TRACE_REDACTION_TRACE_REDACTION_FRAMEWORK_H_
19 
20 #include <bitset>
21 #include <cstdint>
22 #include <memory>
23 #include <optional>
24 #include <string>
25 #include <unordered_set>
26 #include <vector>
27 
28 #include "perfetto/base/flat_set.h"
29 #include "perfetto/base/status.h"
30 #include "src/trace_redaction/frame_cookie.h"
31 #include "src/trace_redaction/process_thread_timeline.h"
32 
33 #include "protos/perfetto/trace/trace_packet.pbzero.h"
34 
35 namespace perfetto::trace_redaction {
36 
37 // Multiple packages can share the same name. This is common when a device has
38 // multiple users. When this happens, each instance shares the 5 least
39 // significant digits.
NormalizeUid(uint64_t uid)40 constexpr uint64_t NormalizeUid(uint64_t uid) {
41   return uid % 1000000;
42 }
43 
44 class SystemInfo {
45  public:
AllocateSynthThread()46   int32_t AllocateSynthThread() { return ++next_synth_thread_; }
47 
ReserveCpu(uint32_t cpu)48   uint32_t ReserveCpu(uint32_t cpu) {
49     last_cpu_ = std::max(last_cpu_, cpu);
50     return last_cpu_;
51   }
52 
cpu_count()53   uint32_t cpu_count() const { return last_cpu_ + 1; }
54 
55  private:
56   // This is the last allocated tid. Using a tid equal to or less than this tid
57   // risks a collision with another tid. If a tid is ever created (by a
58   // primitive) this should be advanced to the max between this value and the
59   // new tid.
60   //
61   // On a 64 bit machine, the max pid limit is 2^22 (approximately 4 million).
62   // Perfetto uses a 32 (signed) int for the pid. Even in this case, there is
63   // room for 2^9 synthetic threads (2 ^ (31 - 22) = 2 ^ 9).
64   //
65   // Futhermore, ther Android source code return 4194304 (2 ^ 22) on 64 bit
66   // devices.
67   //
68   //  /proc/sys/kernel/pid_max (since Linux 2.5.34)
69   //      This file specifies the value at which PIDs wrap around
70   //      (i.e., the value in this file is one greater than the
71   //      maximum PID).  PIDs greater than this value are not
72   //      allocated; thus, the value in this file also acts as a
73   //      system-wide limit on the total number of processes and
74   //      threads.  The default value for this file, 32768, results
75   //      in the same range of PIDs as on earlier kernels.  On
76   //      32-bit platforms, 32768 is the maximum value for pid_max.
77   //      On 64-bit systems, pid_max can be set to any value up to
78   //      2^22 (PID_MAX_LIMIT, approximately 4 million).
79   //
80   // SOURCE: https://man7.org/linux/man-pages/man5/proc.5.html
81   int32_t next_synth_thread_ = 1 << 22;
82 
83   // The last CPU index seen. If this value is 7, it means there are at least
84   // 8 CPUs.
85   uint32_t last_cpu_ = 0;
86 };
87 
88 class SyntheticProcess {
89  public:
SyntheticProcess(const std::vector<int32_t> & tids)90   explicit SyntheticProcess(const std::vector<int32_t>& tids) : tids_(tids) {}
91 
92   // Use the SYSTEM_UID (i.e. 1000) because it best represents this "type" of
93   // process.
uid()94   int32_t uid() const { return 1000; }
95 
96   // Use ppid == 1 which is normally considered to be init on Linux?
ppid()97   int32_t ppid() const { return 1; }
98 
tgid()99   int32_t tgid() const { return tids_.front(); }
100 
tids()101   const std::vector<int32_t>& tids() const { return tids_; }
102 
RunningOn(uint32_t cpu)103   int32_t RunningOn(uint32_t cpu) const { return tids_.at(1 + cpu); }
104 
RunningOn(int32_t cpu)105   int32_t RunningOn(int32_t cpu) const {
106     return tids_.at(1 + static_cast<size_t>(cpu));
107   }
108 
109  private:
110   std::vector<int32_t> tids_;
111 };
112 
113 // Primitives should be stateless. All state should be stored in the context.
114 // Primitives should depend on data in the context, not the origin of the data.
115 // This allows primitives to be swapped out or work together to populate data
116 // needed by another primitive.
117 //
118 // For this to work, primitives are divided into three types:
119 //
120 //  `CollectPrimitive` :  Reads data from trace packets and saves low-level data
121 //                        in the context.
122 //
123 //  `BuildPrimitive` :    Reads low-level data from the context and builds
124 //                        high-level (read-optimized) data structures.
125 //
126 //  `TransformPrimitive`: Reads high-level data from the context and modifies
127 //                        trace packets.
128 class Context {
129  public:
130   // Each packet will have a trusted uid. This is the package emitting the
131   // event. In production we only expect to see system uids. 9999 is the
132   // last allowed uid (allow all uids less than or equal to 9999).
133   static constexpr int32_t kMaxTrustedUid = 9999;
134 
135   // The package that should not be redacted. This must be populated before
136   // running any primitives.
137   std::string package_name;
138 
139   // The package list maps a package name to a uid. It is possible for multiple
140   // package names to map to the same uid, for example:
141   //
142   //    packages {
143   //      name: "com.google.android.gms"
144   //      uid: 10113
145   //      debuggable: false
146   //      profileable_from_shell: false
147   //      version_code: 235013038
148   //    }
149   //
150   // Processes reference their package using a uid:
151   //
152   //    processes {
153   //      pid: 18176
154   //      ppid: 904
155   //      cmdline: "com.google.android.gms.persistent"
156   //      uid: 10113
157   //    }
158   //
159   // An oddity within Android is that two or more processes can reference the
160   // same package using different uids:
161   //
162   //    A = package(M * 100000 + X)
163   //    B = package(N * 100000 + X)
164   //
165   // A and B map to the same package. This happens when there are two or more
166   // profiles on the device (e.g. a work profile and a personal profile).
167   //
168   // From the example above:
169   //
170   //  uid = package_uid_for("com.google.android.gms")
171   //  pid = main_thread_for(uid)
172   //  ASSERT(pid == 18176)
173   //
174   // However, if there is another profile:
175   //
176   //    processes {
177   //      pid: 18176
178   //      ppid: 904
179   //      cmdline: "com.google.android.gms.persistent"
180   //      uid: 10113
181   //    }
182   //    processes {
183   //      pid: 21388
184   //      ppid: 904
185   //      cmdline: "com.google.android.gms.persistent"
186   //      uid: 1010113
187   //    }
188   //
189   // The logic from before still hold, however, if the traced process was pid
190   // 21388, it will be merged with the other threads.
191   //
192   // To avoid this problem from happening, we normalize the uids and treat
193   // both instances as a single process:
194   //
195   //    processes {
196   //      pid: 18176
197   //      ppid: 904
198   //      cmdline: "com.google.android.gms.persistent"
199   //      uid: 10113
200   //    }
201   //    processes {
202   //      pid: 21388
203   //      ppid: 904
204   //      cmdline: "com.google.android.gms.persistent"
205   // -    uid: 1010113
206   // +    uid: 10113
207   //    }
208   //
209   // It sounds like there would be a privacy concern, but because both processes
210   // are from the same app and are being collected from the same user, there
211   // are no new privacy issues by doing this.
212   //
213   // But where should the uids be normalized? The dividing line is the timeline
214   // interface, specifically, should the timeline know anything about uids
215   // (other than "it's a number").
216   //
217   // To avoid expanding the timeline's scope, the uid normalizations is done
218   // outside of the timeline. When a uid is passed into the timeline, it should
219   // be normalized (i.e. 5 != 100005). When the timeline is queried, the uid
220   // should be normalized. This increases the risk for error, but there are only
221   // two places where uids are set, writing the uid to the context and writing
222   // the uid to the timeline.
223   std::optional<uint64_t> package_uid;
224 
225   // Trace packets contain a "one of" entry called "data". This field can be
226   // thought of as the message. A track packet with have other fields along
227   // side "data" (e.g. "timestamp"). These fields can be thought of as metadata.
228   //
229   // A message should be removed if:
230   //
231   //  ...we know it contains too much sensitive information
232   //
233   //  ...we know it contains sensitive information and we know how to remove
234   //        the sensitive information, but don't have the resources to do it
235   //        right now
236   //
237   //  ...we know it provide little value
238   //
239   // "trace_packet_allow_list" contains the field ids of trace packets we want
240   // to pass onto later transformations. Examples are:
241   //
242   //    - protos::pbzero::TracePacket::kProcessTreeFieldNumber
243   //    - protos::pbzero::TracePacket::kProcessStatsFieldNumber
244   //    - protos::pbzero::TracePacket::kClockSnapshotFieldNumber
245   //
246   // If the mask is set to 0x00, all fields would be removed. This should not
247   // happen as some metadata provides context between packets.
248   //
249   // TracePacket has kForTestingFieldNumber which is set to 900.
250   using TracePacketMask = std::bitset<1024>;
251   TracePacketMask packet_mask;
252 
253   // Ftrace packets contain a "one of" entry called "event". Within the scope of
254   // a ftrace event, the event can be considered the payload and other other
255   // values can be considered metadata (e.g. timestamp and pid).
256   //
257   // A ftrace event should be removed if:
258   //
259   //  ... we know it contains too much sensitive information
260   //
261   //  ... we know it contains sensitive information and we have some ideas on
262   //      to remove it, but don't have the resources to do it right now (e.g.
263   //      print).
264   //
265   //  ... we don't see value in including it
266   //
267   // "ftrace_packet_allow_list" contains field ids of ftrace packets that we
268   // want to pass onto later transformations. An example would be:
269   //
270   //  ... kSchedWakingFieldNumber because it contains cpu activity information
271   //
272   // Compared against track days, the rules around removing ftrace packets are
273   // complicated because...
274   //
275   //  packet {
276   //    ftrace_packets {  <-- ONE-OF    (1)
277   //      event {         <-- REPEATED  (2)
278   //        cpu_idle { }  <-- ONE-OF    (3)
279   //      }
280   //      event { ... }
281   //    }
282   //  }
283   //
284   //  1.  A ftrace packet will populate the one-of slot in the trace packet.
285   //
286   //  2.  A ftrace packet can have multiple events
287   //
288   //  3.  In this example, a cpu_idle event populates the one-of slot in the
289   //      ftrace event
290   //
291   // Ftrace event has kMaliMaliPMMCURESETWAITFieldNumber which is set to 532.
292   using FtraceEventMask = std::bitset<1024>;
293   FtraceEventMask ftrace_mask;
294 
295   //  message SuspendResumeFtraceEvent {
296   //    optional string action = 1 [(datapol.semantic_type) = ST_NOT_REQUIRED];
297   //    optional int32 val = 2;
298   //    optional uint32 start = 3 [(datapol.semantic_type) = ST_NOT_REQUIRED];
299   //  }
300   //
301   // The "action" in SuspendResumeFtraceEvent is a free-form string. There are
302   // some know and expected values. Those values are stored here and all events
303   // who's action value is not found here, the ftrace event will be dropped.
304   base::FlatSet<std::string> suspend_result_allow_list;
305 
306   // The timeline is a query-focused data structure that connects a pid to a
307   // uid at specific point in time.
308   //
309   // A timeline has two modes:
310   //
311   //    1. write-only
312   //    2. read-only
313   //
314   // Attempting to use the timeline incorrectly results in undefined behaviour.
315   //
316   // To use a timeline, the primitive needs to be "built" (add events) and then
317   // "sealed" (transition to read-only).
318   //
319   // A timeline must have Sort() called to change from write-only to read-only.
320   // After Sort(), Flatten() and Reduce() can be called (optional) to improve
321   // the practical look-up times (compared to theoretical look-up times).
322   std::unique_ptr<ProcessThreadTimeline> timeline;
323 
324   // All frame events:
325   //
326   //  - ActualDisplayFrame
327   //  - ActualSurfaceFrame
328   //  - ExpectedDisplayFrame
329   //  - ExpectedSurfaceFrame
330   //
331   // Connect a time, a pid, and a cookie value. Cookies are unqiue within a
332   // trace, so if a cookie was connected to the target package, it can always be
333   // used.
334   //
335   // End events (i.e. FrameEnd) only have a time and cookie value. The cookie
336   // value connects it to its start time.
337   //
338   // In the collect phase, all start events are collected and converted to a
339   // simpler structure.
340   //
341   // In the build phase, the cookies are filtered to only include the ones that
342   // belong to the target package. This is down in the build phase, and not the
343   // collect phase, because the timeline is needed to determine if the cookie
344   // belongs to the target package.
345   std::vector<FrameCookie> global_frame_cookies;
346 
347   // The collect of cookies that belong to the target package. Because cookie
348   // values are unique within the scope of the trace, pid and time are no longer
349   // needed and a set can be used for faster queries.
350   std::unordered_set<int64_t> package_frame_cookies;
351 
352   std::optional<SystemInfo> system_info;
353 
354   std::unique_ptr<SyntheticProcess> synthetic_process;
355 };
356 
357 // Extracts low-level data from the trace and writes it into the context. The
358 // life cycle of a collect primitive is:
359 //
360 //  primitive.Begin(&context);
361 //
362 //  for (auto& packet : packets) {
363 //    primitive.Collect(packet, &context);
364 //  }
365 //
366 //  primitive.End(&context);
367 class CollectPrimitive {
368  public:
369   virtual ~CollectPrimitive();
370 
371   // Called once before the first call to Collect(...).
372   virtual base::Status Begin(Context*) const;
373 
374   // Reads a trace packet and updates the context.
375   virtual base::Status Collect(const protos::pbzero::TracePacket::Decoder&,
376                                Context*) const = 0;
377 
378   // Called once after the last call to Collect(...).
379   virtual base::Status End(Context*) const;
380 };
381 
382 // Responsible for converting low-level data from the context and storing it in
383 // the context (high-level data).
384 class BuildPrimitive {
385  public:
386   virtual ~BuildPrimitive();
387 
388   // Reads low-level data from the context and writes high-level data to the
389   // context.
390   virtual base::Status Build(Context* context) const = 0;
391 };
392 
393 // Responsible for modifying trace packets using data from the context.
394 class TransformPrimitive {
395  public:
396   virtual ~TransformPrimitive();
397 
398   // Modifies a packet using data from the context.
399   virtual base::Status Transform(const Context& context,
400                                  std::string* packet) const = 0;
401 };
402 
403 }  // namespace perfetto::trace_redaction
404 
405 #endif  // SRC_TRACE_REDACTION_TRACE_REDACTION_FRAMEWORK_H_
406