1 // Copyright 2014 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/metrics/serialization/serialization_utils.h"
6
7 #include <errno.h>
8 #include <stdint.h>
9 #include <sys/file.h>
10
11 #include <utility>
12
13 #include "base/containers/span.h"
14 #include "base/files/file_path.h"
15 #include "base/files/file_util.h"
16 #include "base/files/scoped_file.h"
17 #include "base/logging.h"
18 #include "base/metrics/histogram_functions.h"
19 #include "base/numerics/safe_math.h"
20 #include "base/strings/string_split.h"
21 #include "base/strings/string_util.h"
22 #include "components/metrics/serialization/metric_sample.h"
23
24 #define READ_WRITE_ALL_FILE_FLAGS \
25 (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)
26
27 namespace metrics {
28 namespace {
29 // Reads the next message from |file_descriptor| into |message|.
30 //
31 // |message| will be set to the empty string if no message could be read (EOF)
32 // or the message was badly constructed.
33 //
34 // Returns false if no message can be read from this file anymore (EOF or
35 // unrecoverable error).
ReadMessage(int fd,std::string * message)36 bool ReadMessage(int fd, std::string* message) {
37 CHECK(message);
38
39 int result;
40 uint32_t encoded_size;
41 const size_t message_header_size = sizeof(uint32_t);
42 // The file containing the metrics does not leave the device so the writer and
43 // the reader will always have the same endianness.
44 result = HANDLE_EINTR(read(fd, &encoded_size, message_header_size));
45 if (result < 0) {
46 DPLOG(ERROR) << "reading metrics message header";
47 return false;
48 }
49 if (result == 0) {
50 // This indicates a normal EOF.
51 return false;
52 }
53 if (base::checked_cast<size_t>(result) < message_header_size) {
54 DLOG(ERROR) << "bad read size " << result << ", expecting "
55 << message_header_size;
56 return false;
57 }
58
59 // kMessageMaxLength applies to the entire message: the 4-byte
60 // length field and the content.
61 size_t message_size = base::checked_cast<size_t>(encoded_size);
62 if (message_size > SerializationUtils::kMessageMaxLength) {
63 DLOG(ERROR) << "message too long : " << message_size;
64 if (HANDLE_EINTR(lseek(fd, message_size - message_header_size, SEEK_CUR)) ==
65 -1) {
66 DLOG(ERROR) << "error while skipping message. abort";
67 return false;
68 }
69 // Badly formatted message was skipped. Treat the badly formatted sample as
70 // an empty sample.
71 message->clear();
72 return true;
73 }
74
75 if (message_size < message_header_size) {
76 DLOG(ERROR) << "message too short : " << message_size;
77 return false;
78 }
79
80 message_size -= message_header_size; // The message size includes itself.
81 char buffer[SerializationUtils::kMessageMaxLength];
82 if (!base::ReadFromFD(fd, buffer, message_size)) {
83 DPLOG(ERROR) << "reading metrics message body";
84 return false;
85 }
86 *message = std::string(buffer, message_size);
87 return true;
88 }
89
90 } // namespace
91
92 // This value is used as a max value in a histogram,
93 // Platform.ExternalMetrics.SamplesRead. If it changes, the histogram will need
94 // to be renamed.
95 const int SerializationUtils::kMaxMessagesPerRead = 100000;
96
ParseSample(const std::string & sample)97 std::unique_ptr<MetricSample> SerializationUtils::ParseSample(
98 const std::string& sample) {
99 if (sample.empty())
100 return nullptr;
101
102 std::vector<std::string> parts = base::SplitString(
103 sample, std::string(1, '\0'),
104 base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
105 // We should have two null terminated strings so split should produce
106 // three chunks.
107 if (parts.size() != 3) {
108 DLOG(ERROR) << "splitting message on \\0 produced " << parts.size()
109 << " parts (expected 3)";
110 return nullptr;
111 }
112 const std::string& name = parts[0];
113 const std::string& value = parts[1];
114
115 if (base::EqualsCaseInsensitiveASCII(name, "crash"))
116 return MetricSample::ParseCrash(value);
117 if (base::EqualsCaseInsensitiveASCII(name, "histogram"))
118 return MetricSample::ParseHistogram(value);
119 if (base::EqualsCaseInsensitiveASCII(name, "linearhistogram"))
120 return MetricSample::ParseLinearHistogram(value);
121 if (base::EqualsCaseInsensitiveASCII(name, "sparsehistogram"))
122 return MetricSample::ParseSparseHistogram(value);
123 if (base::EqualsCaseInsensitiveASCII(name, "useraction"))
124 return MetricSample::ParseUserAction(value);
125 DLOG(ERROR) << "invalid event type: " << name << ", value: " << value;
126 return nullptr;
127 }
128
ReadAndTruncateMetricsFromFile(const std::string & filename,std::vector<std::unique_ptr<MetricSample>> * metrics)129 void SerializationUtils::ReadAndTruncateMetricsFromFile(
130 const std::string& filename,
131 std::vector<std::unique_ptr<MetricSample>>* metrics) {
132 struct stat stat_buf;
133 int result;
134
135 result = stat(filename.c_str(), &stat_buf);
136 if (result < 0) {
137 if (errno == ENOENT) {
138 // File doesn't exist, nothing to collect. This isn't an error, it just
139 // means nothing on the ChromeOS side has written to the file yet.
140 } else {
141 DPLOG(ERROR) << "bad metrics file stat: " << filename;
142 }
143 return;
144 }
145 if (stat_buf.st_size == 0) {
146 // Also nothing to collect.
147 return;
148 }
149 base::ScopedFD fd(open(filename.c_str(), O_RDWR));
150 if (fd.get() < 0) {
151 DPLOG(ERROR) << "cannot open: " << filename;
152 return;
153 }
154 result = flock(fd.get(), LOCK_EX);
155 if (result < 0) {
156 DPLOG(ERROR) << "cannot lock: " << filename;
157 return;
158 }
159
160 // This processes all messages in the log. When all messages are
161 // read and processed, or an error occurs, or we've read so many that the
162 // buffer is at risk of overflowing, truncate the file to zero size. If we
163 // hit kMaxMessagesPerRead, don't add them to the vector to avoid memory
164 // overflow.
165 while (metrics->size() < kMaxMessagesPerRead) {
166 std::string message;
167
168 if (!ReadMessage(fd.get(), &message)) {
169 break;
170 }
171
172 std::unique_ptr<MetricSample> sample = ParseSample(message);
173 if (sample)
174 metrics->push_back(std::move(sample));
175 }
176
177 base::UmaHistogramCustomCounts("Platform.ExternalMetrics.SamplesRead",
178 metrics->size(), 1, kMaxMessagesPerRead - 1,
179 50);
180
181 result = ftruncate(fd.get(), 0);
182 if (result < 0)
183 DPLOG(ERROR) << "truncate metrics log: " << filename;
184
185 result = flock(fd.get(), LOCK_UN);
186 if (result < 0)
187 DPLOG(ERROR) << "unlock metrics log: " << filename;
188 }
189
WriteMetricToFile(const MetricSample & sample,const std::string & filename)190 bool SerializationUtils::WriteMetricToFile(const MetricSample& sample,
191 const std::string& filename) {
192 if (!sample.IsValid())
193 return false;
194
195 base::ScopedFD file_descriptor(open(filename.c_str(),
196 O_WRONLY | O_APPEND | O_CREAT | O_CLOEXEC,
197 READ_WRITE_ALL_FILE_FLAGS));
198
199 if (file_descriptor.get() < 0) {
200 DPLOG(ERROR) << "error opening the file: " << filename;
201 return false;
202 }
203
204 fchmod(file_descriptor.get(), READ_WRITE_ALL_FILE_FLAGS);
205 // Grab a lock to avoid chrome truncating the file underneath us. Keep the
206 // file locked as briefly as possible. Freeing file_descriptor will close the
207 // file and remove the lock IFF the process was not forked in the meantime,
208 // which will leave the flock hanging and deadlock the reporting until the
209 // forked process is killed otherwise. Thus we have to explicitly unlock the
210 // file below.
211 if (HANDLE_EINTR(flock(file_descriptor.get(), LOCK_EX)) < 0) {
212 DPLOG(ERROR) << "error locking: " << filename;
213 return false;
214 }
215
216 std::string msg = sample.ToString();
217 size_t size = 0;
218 if (!base::CheckAdd(msg.length(), sizeof(uint32_t)).AssignIfValid(&size) ||
219 size > kMessageMaxLength) {
220 DPLOG(ERROR) << "cannot write message: too long: " << filename;
221 std::ignore = flock(file_descriptor.get(), LOCK_UN);
222 return false;
223 }
224
225 // The file containing the metrics samples will only be read by programs on
226 // the same device so we do not check endianness.
227 uint32_t encoded_size = base::checked_cast<uint32_t>(size);
228 if (!base::WriteFileDescriptor(
229 file_descriptor.get(),
230 base::as_bytes(base::make_span(&encoded_size, 1u)))) {
231 DPLOG(ERROR) << "error writing message length: " << filename;
232 std::ignore = flock(file_descriptor.get(), LOCK_UN);
233 return false;
234 }
235
236 if (!base::WriteFileDescriptor(file_descriptor.get(), msg)) {
237 DPLOG(ERROR) << "error writing message: " << filename;
238 std::ignore = flock(file_descriptor.get(), LOCK_UN);
239 return false;
240 }
241
242 return true;
243 }
244
245 } // namespace metrics
246