1 // Copyright (c) 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #ifndef THIRD_PARTY_BASE_ALLOCATOR_PARTITION_ALLOCATOR_PAGE_ALLOCATOR_INTERNALS_POSIX_H_
6 #define THIRD_PARTY_BASE_ALLOCATOR_PARTITION_ALLOCATOR_PAGE_ALLOCATOR_INTERNALS_POSIX_H_
7
8 #include <errno.h>
9 #include <sys/mman.h>
10
11 #include "build/build_config.h"
12
13 #if defined(OS_MACOSX)
14 #include <mach/mach.h>
15 #endif
16 #if defined(OS_ANDROID)
17 #include <sys/prctl.h>
18 #endif
19 #if defined(OS_LINUX)
20 #include <sys/resource.h>
21
22 #include <algorithm>
23 #endif
24
25 #include "third_party/base/allocator/partition_allocator/page_allocator.h"
26
27 #ifndef MAP_ANONYMOUS
28 #define MAP_ANONYMOUS MAP_ANON
29 #endif
30
31 namespace pdfium {
32 namespace base {
33
34 #if defined(OS_ANDROID)
35 namespace {
PageTagToName(PageTag tag)36 const char* PageTagToName(PageTag tag) {
37 // Important: All the names should be string literals. As per prctl.h in
38 // //third_party/android_ndk the kernel keeps a pointer to the name instead
39 // of copying it.
40 //
41 // Having the name in .rodata ensures that the pointer remains valid as
42 // long as the mapping is alive.
43 switch (tag) {
44 case PageTag::kBlinkGC:
45 return "blink_gc";
46 case PageTag::kPartitionAlloc:
47 return "partition_alloc";
48 case PageTag::kChromium:
49 return "chromium";
50 case PageTag::kV8:
51 return "v8";
52 default:
53 DCHECK(false);
54 return "";
55 }
56 }
57 } // namespace
58 #endif // defined(OS_ANDROID)
59
60 // |mmap| uses a nearby address if the hint address is blocked.
61 constexpr bool kHintIsAdvisory = true;
62 std::atomic<int32_t> s_allocPageErrorCode{0};
63
GetAccessFlags(PageAccessibilityConfiguration accessibility)64 int GetAccessFlags(PageAccessibilityConfiguration accessibility) {
65 switch (accessibility) {
66 case PageRead:
67 return PROT_READ;
68 case PageReadWrite:
69 return PROT_READ | PROT_WRITE;
70 case PageReadExecute:
71 return PROT_READ | PROT_EXEC;
72 case PageReadWriteExecute:
73 return PROT_READ | PROT_WRITE | PROT_EXEC;
74 default:
75 NOTREACHED();
76 FALLTHROUGH;
77 case PageInaccessible:
78 return PROT_NONE;
79 }
80 }
81
SystemAllocPagesInternal(void * hint,size_t length,PageAccessibilityConfiguration accessibility,PageTag page_tag,bool commit)82 void* SystemAllocPagesInternal(void* hint,
83 size_t length,
84 PageAccessibilityConfiguration accessibility,
85 PageTag page_tag,
86 bool commit) {
87 #if defined(OS_MACOSX)
88 // Use a custom tag to make it easier to distinguish Partition Alloc regions
89 // in vmmap(1). Tags between 240-255 are supported.
90 DCHECK(PageTag::kFirst <= page_tag);
91 DCHECK(PageTag::kLast >= page_tag);
92 int fd = VM_MAKE_TAG(static_cast<int>(page_tag));
93 #else
94 int fd = -1;
95 #endif
96
97 int access_flag = GetAccessFlags(accessibility);
98 int map_flags = MAP_ANONYMOUS | MAP_PRIVATE;
99
100 // TODO(https://crbug.com/927411): Remove once Fuchsia uses a native page
101 // allocator, rather than relying on POSIX compatibility.
102 #if defined(OS_FUCHSIA)
103 if (page_tag == PageTag::kV8) {
104 map_flags |= MAP_JIT;
105 }
106 #endif
107
108 void* ret = mmap(hint, length, access_flag, map_flags, fd, 0);
109 if (ret == MAP_FAILED) {
110 s_allocPageErrorCode = errno;
111 ret = nullptr;
112 }
113
114 #if defined(OS_ANDROID)
115 // On Android, anonymous mappings can have a name attached to them. This is
116 // useful for debugging, and double-checking memory attribution.
117 if (ret) {
118 // No error checking on purpose, testing only.
119 prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ret, length,
120 PageTagToName(page_tag));
121 }
122 #endif
123
124 return ret;
125 }
126
TrimMappingInternal(void * base,size_t base_length,size_t trim_length,PageAccessibilityConfiguration accessibility,bool commit,size_t pre_slack,size_t post_slack)127 void* TrimMappingInternal(void* base,
128 size_t base_length,
129 size_t trim_length,
130 PageAccessibilityConfiguration accessibility,
131 bool commit,
132 size_t pre_slack,
133 size_t post_slack) {
134 void* ret = base;
135 // We can resize the allocation run. Release unneeded memory before and after
136 // the aligned range.
137 if (pre_slack) {
138 int res = munmap(base, pre_slack);
139 CHECK(!res);
140 ret = reinterpret_cast<char*>(base) + pre_slack;
141 }
142 if (post_slack) {
143 int res = munmap(reinterpret_cast<char*>(ret) + trim_length, post_slack);
144 CHECK(!res);
145 }
146 return ret;
147 }
148
TrySetSystemPagesAccessInternal(void * address,size_t length,PageAccessibilityConfiguration accessibility)149 bool TrySetSystemPagesAccessInternal(
150 void* address,
151 size_t length,
152 PageAccessibilityConfiguration accessibility) {
153 return 0 == mprotect(address, length, GetAccessFlags(accessibility));
154 }
155
SetSystemPagesAccessInternal(void * address,size_t length,PageAccessibilityConfiguration accessibility)156 void SetSystemPagesAccessInternal(
157 void* address,
158 size_t length,
159 PageAccessibilityConfiguration accessibility) {
160 CHECK_EQ(0, mprotect(address, length, GetAccessFlags(accessibility)));
161 }
162
FreePagesInternal(void * address,size_t length)163 void FreePagesInternal(void* address, size_t length) {
164 CHECK(!munmap(address, length));
165 }
166
DecommitSystemPagesInternal(void * address,size_t length)167 void DecommitSystemPagesInternal(void* address, size_t length) {
168 // In POSIX, there is no decommit concept. Discarding is an effective way of
169 // implementing the Windows semantics where the OS is allowed to not swap the
170 // pages in the region.
171 //
172 // TODO(ajwong): Also explore setting PageInaccessible to make the protection
173 // semantics consistent between Windows and POSIX. This might have a perf cost
174 // though as both decommit and recommit would incur an extra syscall.
175 // http://crbug.com/766882
176 DiscardSystemPages(address, length);
177 }
178
RecommitSystemPagesInternal(void * address,size_t length,PageAccessibilityConfiguration accessibility)179 bool RecommitSystemPagesInternal(void* address,
180 size_t length,
181 PageAccessibilityConfiguration accessibility) {
182 #if defined(OS_MACOSX)
183 // On macOS, to update accounting, we need to make another syscall. For more
184 // details, see https://crbug.com/823915.
185 madvise(address, length, MADV_FREE_REUSE);
186 #endif
187
188 // On POSIX systems, the caller need simply read the memory to recommit it.
189 // This has the correct behavior because the API requires the permissions to
190 // be the same as before decommitting and all configurations can read.
191 return true;
192 }
193
DiscardSystemPagesInternal(void * address,size_t length)194 void DiscardSystemPagesInternal(void* address, size_t length) {
195 #if defined(OS_MACOSX)
196 int ret = madvise(address, length, MADV_FREE_REUSABLE);
197 if (ret) {
198 // MADV_FREE_REUSABLE sometimes fails, so fall back to MADV_DONTNEED.
199 ret = madvise(address, length, MADV_DONTNEED);
200 }
201 CHECK(0 == ret);
202 #else
203 // We have experimented with other flags, but with suboptimal results.
204 //
205 // MADV_FREE (Linux): Makes our memory measurements less predictable;
206 // performance benefits unclear.
207 //
208 // Therefore, we just do the simple thing: MADV_DONTNEED.
209 CHECK(!madvise(address, length, MADV_DONTNEED));
210 #endif
211 }
212
213 } // namespace base
214 } // namespace pdfium
215
216 #endif // THIRD_PARTY_BASE_ALLOCATOR_PARTITION_ALLOCATOR_PAGE_ALLOCATOR_INTERNALS_POSIX_H_
217