• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2012 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/nix/mime_util_xdg.h"
6 
7 #include <memory>
8 #include <utility>
9 
10 #include "base/check.h"
11 #include "base/containers/stack.h"
12 #include "base/environment.h"
13 #include "base/files/file_path.h"
14 #include "base/files/file_util.h"
15 #include "base/logging.h"
16 #include "base/nix/xdg_util.h"
17 #include "base/no_destructor.h"
18 #include "base/numerics/byte_conversions.h"
19 #include "base/ranges/algorithm.h"
20 #include "base/strings/string_util.h"
21 #include "base/strings/utf_string_conversion_utils.h"
22 #include "build/build_config.h"
23 
24 #if !BUILDFLAG(IS_CHROMEOS)
25 #include "base/synchronization/lock.h"
26 #endif
27 
28 namespace base::nix {
29 namespace {
30 
31 // Ridiculously large size for a /usr/share/mime/mime.cache file.
32 // Default file is about 100KB, allow up to 10MB.
33 constexpr size_t kMaxMimeTypesFileSize = 10 * 1024 * 1024;
34 // Maximum number of nodes to allow in reverse suffix tree.
35 // Default file has ~3K nodes, allow up to 30K.
36 constexpr size_t kMaxNodes = 30000;
37 // Maximum file extension size.
38 constexpr size_t kMaxExtSize = 100;
39 // Header size in mime.cache file.
40 constexpr size_t kHeaderSize = 40;
41 // Largest valid unicode code point is U+10ffff.
42 constexpr uint32_t kMaxUnicode = 0x10ffff;
43 // Default mime glob weight is 50, max is 100.
44 constexpr uint8_t kDefaultGlobWeight = 50;
45 
46 // Path and last modified of mime.cache file.
47 struct FileInfo {
48   FilePath path;
49   Time last_modified;
50 };
51 
52 // Load all mime cache files on the system.
LoadAllMimeCacheFiles(MimeTypeMap & map,std::vector<FileInfo> & files)53 void LoadAllMimeCacheFiles(MimeTypeMap& map, std::vector<FileInfo>& files) {
54   std::unique_ptr<Environment> env(Environment::Create());
55   File::Info info;
56   for (const auto& path : GetXDGDataSearchLocations(env.get())) {
57     FilePath mime_cache = path.Append("mime/mime.cache");
58     if (GetFileInfo(mime_cache, &info) && ParseMimeTypes(mime_cache, map)) {
59       files.emplace_back(mime_cache, info.last_modified);
60     }
61   }
62 }
63 
64 // Read 4 bytes from string `buf` at `offset` as network order uint32_t.
65 // Returns false if `offset > buf.size() - 4` or `offset` is not aligned to a
66 // 4-byte word boundary, or `*result` is not between `min_result` and
67 // `max_result`. `field_name` is used in error message.
ReadInt(const std::string & buf,uint32_t offset,const std::string & field_name,uint32_t min_result,size_t max_result,uint32_t * result)68 bool ReadInt(const std::string& buf,
69              uint32_t offset,
70              const std::string& field_name,
71              uint32_t min_result,
72              size_t max_result,
73              uint32_t* result) {
74   if (offset > buf.size() - 4 || (offset & 0x3)) {
75     LOG(ERROR) << "Invalid offset=" << offset << " for " << field_name
76                << ", string size=" << buf.size();
77     return false;
78   }
79   auto bytes = base::as_byte_span(buf);
80   *result = base::U32FromBigEndian(bytes.subspan(offset).first<4u>());
81   if (*result < min_result || *result > max_result) {
82     LOG(ERROR) << "Invalid " << field_name << "=" << *result
83                << " not between min_result=" << min_result
84                << " and max_result=" << max_result;
85     return false;
86   }
87   return true;
88 }
89 
90 }  // namespace
91 
ParseMimeTypes(const FilePath & file_path,MimeTypeMap & out_mime_types)92 bool ParseMimeTypes(const FilePath& file_path, MimeTypeMap& out_mime_types) {
93   // File format from
94   // https://specifications.freedesktop.org/shared-mime-info-spec/shared-mime-info-spec-0.21.html#idm46070612075440
95   // Header:
96   // 2      CARD16    MAJOR_VERSION  1
97   // 2      CARD16    MINOR_VERSION  2
98   // 4      CARD32    ALIAS_LIST_OFFSET
99   // 4      CARD32    PARENT_LIST_OFFSET
100   // 4      CARD32    LITERAL_LIST_OFFSET
101   // 4      CARD32    REVERSE_SUFFIX_TREE_OFFSET
102   // ...
103   // ReverseSuffixTree:
104   // 4      CARD32    N_ROOTS
105   // 4       CARD32    FIRST_ROOT_OFFSET
106   // ReverseSuffixTreeNode:
107   // 4      CARD32    CHARACTER
108   // 4      CARD32    N_CHILDREN
109   // 4      CARD32    FIRST_CHILD_OFFSET
110   // ReverseSuffixTreeLeafNode:
111   // 4      CARD32    0
112   // 4      CARD32    MIME_TYPE_OFFSET
113   // 4      CARD32    WEIGHT in lower 8 bits
114   //                  FLAGS in rest:
115   //                  0x100 = case-sensitive
116 
117   std::string buf;
118   if (!ReadFileToStringWithMaxSize(file_path, &buf, kMaxMimeTypesFileSize)) {
119     LOG(ERROR) << "Failed reading in mime.cache file: " << file_path;
120     return false;
121   }
122 
123   if (buf.size() < kHeaderSize) {
124     LOG(ERROR) << "Invalid mime.cache file size=" << buf.size();
125     return false;
126   }
127 
128   // Validate file[ALIAS_LIST_OFFSET - 1] is null to ensure that any
129   // null-terminated strings dereferenced at addresses below ALIAS_LIST_OFFSET
130   // will not overflow.
131   uint32_t alias_list_offset = 0;
132   if (!ReadInt(buf, 4, "ALIAS_LIST_OFFSET", kHeaderSize, buf.size(),
133                &alias_list_offset)) {
134     return false;
135   }
136   if (buf[alias_list_offset - 1] != 0) {
137     LOG(ERROR) << "Invalid mime.cache file does not contain null prior to "
138                   "ALIAS_LIST_OFFSET="
139                << alias_list_offset;
140     return false;
141   }
142 
143   // Parse ReverseSuffixTree. Read all nodes and place them on `stack`,
144   // allowing max of kMaxNodes and max extension of kMaxExtSize.
145   uint32_t tree_offset = 0;
146   if (!ReadInt(buf, 16, "REVERSE_SUFFIX_TREE_OFFSET", kHeaderSize, buf.size(),
147                &tree_offset)) {
148     return false;
149   }
150 
151   struct Node {
152     std::string ext;
153     uint32_t n_children;
154     uint32_t first_child_offset;
155   };
156 
157   // Read root node and put it on the stack.
158   Node root;
159   if (!ReadInt(buf, tree_offset, "N_ROOTS", 0, kMaxUnicode, &root.n_children)) {
160     return false;
161   }
162   if (!ReadInt(buf, tree_offset + 4, "FIRST_ROOT_OFFSET", tree_offset,
163                buf.size(), &root.first_child_offset)) {
164     return false;
165   }
166   stack<Node> stack;
167   stack.push(std::move(root));
168 
169   uint32_t num_nodes = 0;
170   while (!stack.empty()) {
171     // Pop top node from the stack and process children.
172     Node n = std::move(stack.top());
173     stack.pop();
174     uint32_t p = n.first_child_offset;
175     for (uint32_t i = 0; i < n.n_children; i++) {
176       uint32_t c = 0;
177       if (!ReadInt(buf, p, "CHARACTER", 0, kMaxUnicode, &c)) {
178         return false;
179       }
180       p += 4;
181 
182       // Leaf node, add mime type if it is highest weight.
183       if (c == 0) {
184         uint32_t mime_type_offset = 0;
185         if (!ReadInt(buf, p, "mime type offset", kHeaderSize,
186                      alias_list_offset - 1, &mime_type_offset)) {
187           return false;
188         }
189         p += 4;
190         uint8_t weight = kDefaultGlobWeight;
191         if ((p + 3) < buf.size()) {
192           weight = static_cast<uint8_t>(buf[p + 3]);
193         }
194         p += 4;
195         if (n.ext.size() > 0 && n.ext[0] == '.') {
196           std::string_view ext = std::string_view(n.ext).substr(1u);
197           auto it = out_mime_types.find(ext);
198           if (it == out_mime_types.end() || weight > it->second.weight) {
199             // Use the mime type string from `buf` up to the first NUL.
200             auto mime_type = std::string_view(buf).substr(mime_type_offset);
201             mime_type = mime_type.substr(0u, mime_type.find('\0'));
202             out_mime_types[std::string(ext)] = {std::string(mime_type), weight};
203           }
204         }
205         continue;
206       }
207 
208       // Regular node, parse and add it to the stack.
209       Node node;
210       WriteUnicodeCharacter(static_cast<int>(c), &node.ext);
211       node.ext += n.ext;
212       if (!ReadInt(buf, p, "N_CHILDREN", 0, kMaxUnicode, &node.n_children)) {
213         return false;
214       }
215       p += 4;
216       if (!ReadInt(buf, p, "FIRST_CHILD_OFFSET", tree_offset, buf.size(),
217                    &node.first_child_offset)) {
218         return false;
219       }
220       p += 4;
221 
222       // Check limits.
223       if (++num_nodes > kMaxNodes) {
224         LOG(ERROR) << "Exceeded maxium number of nodes=" << kMaxNodes;
225         return false;
226       }
227       if (node.ext.size() > kMaxExtSize) {
228         LOG(WARNING) << "Ignoring large extension exceeds size=" << kMaxExtSize
229                      << " ext=" << node.ext;
230         continue;
231       }
232 
233       stack.push(std::move(node));
234     }
235   }
236 
237   return true;
238 }
239 
GetFileMimeType(const FilePath & filepath)240 std::string GetFileMimeType(const FilePath& filepath) {
241   std::string ext = filepath.Extension();
242   if (ext.empty()) {
243     return std::string();
244   }
245 
246   static NoDestructor<std::vector<FileInfo>> xdg_mime_files;
247 
248   static NoDestructor<MimeTypeMap> mime_type_map([] {
249     MimeTypeMap map;
250     LoadAllMimeCacheFiles(map, *xdg_mime_files);
251     return map;
252   }());
253 
254   // Files never change on ChromeOS, but for linux, match xdgmime behavior and
255   // check every 5s and reload if any files have changed.
256 #if !BUILDFLAG(IS_CHROMEOS)
257   static Time last_check;
258   // Lock is required since this may be called on any thread.
259   static NoDestructor<Lock> lock;
260   {
261     AutoLock scoped_lock(*lock);
262 
263     Time now = Time::Now();
264     if (last_check + Seconds(5) < now) {
265       if (ranges::any_of(*xdg_mime_files, [](const FileInfo& file_info) {
266             File::Info info;
267             return !GetFileInfo(file_info.path, &info) ||
268                    info.last_modified != file_info.last_modified;
269           })) {
270         mime_type_map->clear();
271         xdg_mime_files->clear();
272         LoadAllMimeCacheFiles(*mime_type_map, *xdg_mime_files);
273       }
274       last_check = now;
275     }
276   }
277 #endif
278 
279   auto it = mime_type_map->find(ext.substr(1));
280   return it != mime_type_map->end() ? it->second.mime_type : std::string();
281 }
282 
283 }  // namespace base::nix
284