• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <dlfcn.h>
25 #include <assert.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/stat.h>
30 #include <unistd.h>
31 #include <fcntl.h>
32 
33 #include "anv_private.h"
34 #include "util/strtod.h"
35 #include "util/debug.h"
36 
37 #include "genxml/gen7_pack.h"
38 
39 struct anv_dispatch_table dtable;
40 
41 static void
compiler_debug_log(void * data,const char * fmt,...)42 compiler_debug_log(void *data, const char *fmt, ...)
43 { }
44 
45 static void
compiler_perf_log(void * data,const char * fmt,...)46 compiler_perf_log(void *data, const char *fmt, ...)
47 {
48    va_list args;
49    va_start(args, fmt);
50 
51    if (unlikely(INTEL_DEBUG & DEBUG_PERF))
52       vfprintf(stderr, fmt, args);
53 
54    va_end(args);
55 }
56 
57 static bool
anv_get_function_timestamp(void * ptr,uint32_t * timestamp)58 anv_get_function_timestamp(void *ptr, uint32_t* timestamp)
59 {
60    Dl_info info;
61    struct stat st;
62    if (!dladdr(ptr, &info) || !info.dli_fname)
63       return false;
64 
65    if (stat(info.dli_fname, &st))
66       return false;
67 
68    *timestamp = st.st_mtim.tv_sec;
69    return true;
70 }
71 
72 static bool
anv_device_get_cache_uuid(void * uuid)73 anv_device_get_cache_uuid(void *uuid)
74 {
75    uint32_t timestamp;
76 
77    memset(uuid, 0, VK_UUID_SIZE);
78    if (!anv_get_function_timestamp(anv_device_get_cache_uuid, &timestamp))
79       return false;
80 
81    snprintf(uuid, VK_UUID_SIZE, "anv-%d", timestamp);
82    return true;
83 }
84 
85 static VkResult
anv_physical_device_init(struct anv_physical_device * device,struct anv_instance * instance,const char * path)86 anv_physical_device_init(struct anv_physical_device *device,
87                          struct anv_instance *instance,
88                          const char *path)
89 {
90    VkResult result;
91    int fd;
92 
93    fd = open(path, O_RDWR | O_CLOEXEC);
94    if (fd < 0)
95       return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
96 
97    device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
98    device->instance = instance;
99 
100    assert(strlen(path) < ARRAY_SIZE(device->path));
101    strncpy(device->path, path, ARRAY_SIZE(device->path));
102 
103    device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
104    if (!device->chipset_id) {
105       result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
106       goto fail;
107    }
108 
109    device->name = gen_get_device_name(device->chipset_id);
110    if (!gen_get_device_info(device->chipset_id, &device->info)) {
111       result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
112       goto fail;
113    }
114 
115    if (device->info.is_haswell) {
116       fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n");
117    } else if (device->info.gen == 7 && !device->info.is_baytrail) {
118       fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n");
119    } else if (device->info.gen == 7 && device->info.is_baytrail) {
120       fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n");
121    } else if (device->info.gen >= 8) {
122       /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully
123        * supported as anything */
124    } else {
125       result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
126                          "Vulkan not yet supported on %s", device->name);
127       goto fail;
128    }
129 
130    device->cmd_parser_version = -1;
131    if (device->info.gen == 7) {
132       device->cmd_parser_version =
133          anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
134       if (device->cmd_parser_version == -1) {
135          result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
136                             "failed to get command parser version");
137          goto fail;
138       }
139    }
140 
141    if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) {
142       result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
143                          "failed to get aperture size: %m");
144       goto fail;
145    }
146 
147    if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
148       result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
149                          "kernel missing gem wait");
150       goto fail;
151    }
152 
153    if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
154       result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
155                          "kernel missing execbuf2");
156       goto fail;
157    }
158 
159    if (!device->info.has_llc &&
160        anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
161       result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
162                          "kernel missing wc mmap");
163       goto fail;
164    }
165 
166    if (!anv_device_get_cache_uuid(device->uuid)) {
167       result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
168                          "cannot generate UUID");
169       goto fail;
170    }
171    bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
172 
173    /* GENs prior to 8 do not support EU/Subslice info */
174    if (device->info.gen >= 8) {
175       device->subslice_total = anv_gem_get_param(fd, I915_PARAM_SUBSLICE_TOTAL);
176       device->eu_total = anv_gem_get_param(fd, I915_PARAM_EU_TOTAL);
177 
178       /* Without this information, we cannot get the right Braswell
179        * brandstrings, and we have to use conservative numbers for GPGPU on
180        * many platforms, but otherwise, things will just work.
181        */
182       if (device->subslice_total < 1 || device->eu_total < 1) {
183          fprintf(stderr, "WARNING: Kernel 4.1 required to properly"
184                          " query GPU properties.\n");
185       }
186    } else if (device->info.gen == 7) {
187       device->subslice_total = 1 << (device->info.gt - 1);
188    }
189 
190    if (device->info.is_cherryview &&
191        device->subslice_total > 0 && device->eu_total > 0) {
192       /* Logical CS threads = EUs per subslice * 7 threads per EU */
193       uint32_t max_cs_threads = device->eu_total / device->subslice_total * 7;
194 
195       /* Fuse configurations may give more threads than expected, never less. */
196       if (max_cs_threads > device->info.max_cs_threads)
197          device->info.max_cs_threads = max_cs_threads;
198    }
199 
200    brw_process_intel_debug_variable();
201 
202    device->compiler = brw_compiler_create(NULL, &device->info);
203    if (device->compiler == NULL) {
204       result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
205       goto fail;
206    }
207    device->compiler->shader_debug_log = compiler_debug_log;
208    device->compiler->shader_perf_log = compiler_perf_log;
209 
210    result = anv_init_wsi(device);
211    if (result != VK_SUCCESS) {
212       ralloc_free(device->compiler);
213       goto fail;
214    }
215 
216    isl_device_init(&device->isl_dev, &device->info, swizzled);
217 
218    close(fd);
219    return VK_SUCCESS;
220 
221 fail:
222    close(fd);
223    return result;
224 }
225 
226 static void
anv_physical_device_finish(struct anv_physical_device * device)227 anv_physical_device_finish(struct anv_physical_device *device)
228 {
229    anv_finish_wsi(device);
230    ralloc_free(device->compiler);
231 }
232 
233 static const VkExtensionProperties global_extensions[] = {
234    {
235       .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
236       .specVersion = 25,
237    },
238 #ifdef VK_USE_PLATFORM_XCB_KHR
239    {
240       .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
241       .specVersion = 6,
242    },
243 #endif
244 #ifdef VK_USE_PLATFORM_XLIB_KHR
245    {
246       .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
247       .specVersion = 6,
248    },
249 #endif
250 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
251    {
252       .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
253       .specVersion = 5,
254    },
255 #endif
256 };
257 
258 static const VkExtensionProperties device_extensions[] = {
259    {
260       .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
261       .specVersion = 68,
262    },
263    {
264       .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
265       .specVersion = 1,
266    }
267 };
268 
269 static void *
default_alloc_func(void * pUserData,size_t size,size_t align,VkSystemAllocationScope allocationScope)270 default_alloc_func(void *pUserData, size_t size, size_t align,
271                    VkSystemAllocationScope allocationScope)
272 {
273    return malloc(size);
274 }
275 
276 static void *
default_realloc_func(void * pUserData,void * pOriginal,size_t size,size_t align,VkSystemAllocationScope allocationScope)277 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
278                      size_t align, VkSystemAllocationScope allocationScope)
279 {
280    return realloc(pOriginal, size);
281 }
282 
283 static void
default_free_func(void * pUserData,void * pMemory)284 default_free_func(void *pUserData, void *pMemory)
285 {
286    free(pMemory);
287 }
288 
289 static const VkAllocationCallbacks default_alloc = {
290    .pUserData = NULL,
291    .pfnAllocation = default_alloc_func,
292    .pfnReallocation = default_realloc_func,
293    .pfnFree = default_free_func,
294 };
295 
anv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)296 VkResult anv_CreateInstance(
297     const VkInstanceCreateInfo*                 pCreateInfo,
298     const VkAllocationCallbacks*                pAllocator,
299     VkInstance*                                 pInstance)
300 {
301    struct anv_instance *instance;
302 
303    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
304 
305    uint32_t client_version;
306    if (pCreateInfo->pApplicationInfo &&
307        pCreateInfo->pApplicationInfo->apiVersion != 0) {
308       client_version = pCreateInfo->pApplicationInfo->apiVersion;
309    } else {
310       client_version = VK_MAKE_VERSION(1, 0, 0);
311    }
312 
313    if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
314        client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
315       return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
316                        "Client requested version %d.%d.%d",
317                        VK_VERSION_MAJOR(client_version),
318                        VK_VERSION_MINOR(client_version),
319                        VK_VERSION_PATCH(client_version));
320    }
321 
322    for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
323       bool found = false;
324       for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) {
325          if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
326                     global_extensions[j].extensionName) == 0) {
327             found = true;
328             break;
329          }
330       }
331       if (!found)
332          return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
333    }
334 
335    instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
336                          VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
337    if (!instance)
338       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
339 
340    instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
341 
342    if (pAllocator)
343       instance->alloc = *pAllocator;
344    else
345       instance->alloc = default_alloc;
346 
347    instance->apiVersion = client_version;
348    instance->physicalDeviceCount = -1;
349 
350    _mesa_locale_init();
351 
352    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
353 
354    *pInstance = anv_instance_to_handle(instance);
355 
356    return VK_SUCCESS;
357 }
358 
anv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)359 void anv_DestroyInstance(
360     VkInstance                                  _instance,
361     const VkAllocationCallbacks*                pAllocator)
362 {
363    ANV_FROM_HANDLE(anv_instance, instance, _instance);
364 
365    if (!instance)
366       return;
367 
368    if (instance->physicalDeviceCount > 0) {
369       /* We support at most one physical device. */
370       assert(instance->physicalDeviceCount == 1);
371       anv_physical_device_finish(&instance->physicalDevice);
372    }
373 
374    VG(VALGRIND_DESTROY_MEMPOOL(instance));
375 
376    _mesa_locale_fini();
377 
378    vk_free(&instance->alloc, instance);
379 }
380 
anv_EnumeratePhysicalDevices(VkInstance _instance,uint32_t * pPhysicalDeviceCount,VkPhysicalDevice * pPhysicalDevices)381 VkResult anv_EnumeratePhysicalDevices(
382     VkInstance                                  _instance,
383     uint32_t*                                   pPhysicalDeviceCount,
384     VkPhysicalDevice*                           pPhysicalDevices)
385 {
386    ANV_FROM_HANDLE(anv_instance, instance, _instance);
387    VkResult result;
388 
389    if (instance->physicalDeviceCount < 0) {
390       char path[20];
391       for (unsigned i = 0; i < 8; i++) {
392          snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
393          result = anv_physical_device_init(&instance->physicalDevice,
394                                            instance, path);
395          if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
396             break;
397       }
398 
399       if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
400          instance->physicalDeviceCount = 0;
401       } else if (result == VK_SUCCESS) {
402          instance->physicalDeviceCount = 1;
403       } else {
404          return result;
405       }
406    }
407 
408    /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL;
409     * otherwise it's an inout parameter.
410     *
411     * The Vulkan spec (git aaed022) says:
412     *
413     *    pPhysicalDeviceCount is a pointer to an unsigned integer variable
414     *    that is initialized with the number of devices the application is
415     *    prepared to receive handles to. pname:pPhysicalDevices is pointer to
416     *    an array of at least this many VkPhysicalDevice handles [...].
417     *
418     *    Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices
419     *    overwrites the contents of the variable pointed to by
420     *    pPhysicalDeviceCount with the number of physical devices in in the
421     *    instance; otherwise, vkEnumeratePhysicalDevices overwrites
422     *    pPhysicalDeviceCount with the number of physical handles written to
423     *    pPhysicalDevices.
424     */
425    if (!pPhysicalDevices) {
426       *pPhysicalDeviceCount = instance->physicalDeviceCount;
427    } else if (*pPhysicalDeviceCount >= 1) {
428       pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice);
429       *pPhysicalDeviceCount = 1;
430    } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount) {
431       return VK_INCOMPLETE;
432    } else {
433       *pPhysicalDeviceCount = 0;
434    }
435 
436    return VK_SUCCESS;
437 }
438 
anv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures * pFeatures)439 void anv_GetPhysicalDeviceFeatures(
440     VkPhysicalDevice                            physicalDevice,
441     VkPhysicalDeviceFeatures*                   pFeatures)
442 {
443    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
444 
445    *pFeatures = (VkPhysicalDeviceFeatures) {
446       .robustBufferAccess                       = true,
447       .fullDrawIndexUint32                      = true,
448       .imageCubeArray                           = true,
449       .independentBlend                         = true,
450       .geometryShader                           = true,
451       .tessellationShader                       = true,
452       .sampleRateShading                        = true,
453       .dualSrcBlend                             = true,
454       .logicOp                                  = true,
455       .multiDrawIndirect                        = false,
456       .drawIndirectFirstInstance                = true,
457       .depthClamp                               = true,
458       .depthBiasClamp                           = true,
459       .fillModeNonSolid                         = true,
460       .depthBounds                              = false,
461       .wideLines                                = true,
462       .largePoints                              = true,
463       .alphaToOne                               = true,
464       .multiViewport                            = true,
465       .samplerAnisotropy                        = true,
466       .textureCompressionETC2                   = pdevice->info.gen >= 8 ||
467                                                   pdevice->info.is_baytrail,
468       .textureCompressionASTC_LDR               = pdevice->info.gen >= 9, /* FINISHME CHV */
469       .textureCompressionBC                     = true,
470       .occlusionQueryPrecise                    = true,
471       .pipelineStatisticsQuery                  = false,
472       .fragmentStoresAndAtomics                 = true,
473       .shaderTessellationAndGeometryPointSize   = true,
474       .shaderImageGatherExtended                = true,
475       .shaderStorageImageExtendedFormats        = true,
476       .shaderStorageImageMultisample            = false,
477       .shaderStorageImageReadWithoutFormat      = false,
478       .shaderStorageImageWriteWithoutFormat     = false,
479       .shaderUniformBufferArrayDynamicIndexing  = true,
480       .shaderSampledImageArrayDynamicIndexing   = true,
481       .shaderStorageBufferArrayDynamicIndexing  = true,
482       .shaderStorageImageArrayDynamicIndexing   = true,
483       .shaderClipDistance                       = true,
484       .shaderCullDistance                       = true,
485       .shaderFloat64                            = pdevice->info.gen >= 8,
486       .shaderInt64                              = false,
487       .shaderInt16                              = false,
488       .shaderResourceMinLod                     = false,
489       .variableMultisampleRate                  = false,
490       .inheritedQueries                         = false,
491    };
492 
493    /* We can't do image stores in vec4 shaders */
494    pFeatures->vertexPipelineStoresAndAtomics =
495       pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
496       pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
497 }
498 
anv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties * pProperties)499 void anv_GetPhysicalDeviceProperties(
500     VkPhysicalDevice                            physicalDevice,
501     VkPhysicalDeviceProperties*                 pProperties)
502 {
503    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
504    const struct gen_device_info *devinfo = &pdevice->info;
505 
506    const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0;
507 
508    /* See assertions made when programming the buffer surface state. */
509    const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ?
510                                       (1ul << 30) : (1ul << 27);
511 
512    VkSampleCountFlags sample_counts =
513       isl_device_get_sample_counts(&pdevice->isl_dev);
514 
515    VkPhysicalDeviceLimits limits = {
516       .maxImageDimension1D                      = (1 << 14),
517       .maxImageDimension2D                      = (1 << 14),
518       .maxImageDimension3D                      = (1 << 11),
519       .maxImageDimensionCube                    = (1 << 14),
520       .maxImageArrayLayers                      = (1 << 11),
521       .maxTexelBufferElements                   = 128 * 1024 * 1024,
522       .maxUniformBufferRange                    = (1ul << 27),
523       .maxStorageBufferRange                    = max_raw_buffer_sz,
524       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
525       .maxMemoryAllocationCount                 = UINT32_MAX,
526       .maxSamplerAllocationCount                = 64 * 1024,
527       .bufferImageGranularity                   = 64, /* A cache line */
528       .sparseAddressSpaceSize                   = 0,
529       .maxBoundDescriptorSets                   = MAX_SETS,
530       .maxPerStageDescriptorSamplers            = 64,
531       .maxPerStageDescriptorUniformBuffers      = 64,
532       .maxPerStageDescriptorStorageBuffers      = 64,
533       .maxPerStageDescriptorSampledImages       = 64,
534       .maxPerStageDescriptorStorageImages       = 64,
535       .maxPerStageDescriptorInputAttachments    = 64,
536       .maxPerStageResources                     = 128,
537       .maxDescriptorSetSamplers                 = 256,
538       .maxDescriptorSetUniformBuffers           = 256,
539       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
540       .maxDescriptorSetStorageBuffers           = 256,
541       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
542       .maxDescriptorSetSampledImages            = 256,
543       .maxDescriptorSetStorageImages            = 256,
544       .maxDescriptorSetInputAttachments         = 256,
545       .maxVertexInputAttributes                 = 32,
546       .maxVertexInputBindings                   = 32,
547       .maxVertexInputAttributeOffset            = 2047,
548       .maxVertexInputBindingStride              = 2048,
549       .maxVertexOutputComponents                = 128,
550       .maxTessellationGenerationLevel           = 64,
551       .maxTessellationPatchSize                 = 32,
552       .maxTessellationControlPerVertexInputComponents = 128,
553       .maxTessellationControlPerVertexOutputComponents = 128,
554       .maxTessellationControlPerPatchOutputComponents = 128,
555       .maxTessellationControlTotalOutputComponents = 2048,
556       .maxTessellationEvaluationInputComponents = 128,
557       .maxTessellationEvaluationOutputComponents = 128,
558       .maxGeometryShaderInvocations             = 32,
559       .maxGeometryInputComponents               = 64,
560       .maxGeometryOutputComponents              = 128,
561       .maxGeometryOutputVertices                = 256,
562       .maxGeometryTotalOutputComponents         = 1024,
563       .maxFragmentInputComponents               = 128,
564       .maxFragmentOutputAttachments             = 8,
565       .maxFragmentDualSrcAttachments            = 1,
566       .maxFragmentCombinedOutputResources       = 8,
567       .maxComputeSharedMemorySize               = 32768,
568       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
569       .maxComputeWorkGroupInvocations           = 16 * devinfo->max_cs_threads,
570       .maxComputeWorkGroupSize = {
571          16 * devinfo->max_cs_threads,
572          16 * devinfo->max_cs_threads,
573          16 * devinfo->max_cs_threads,
574       },
575       .subPixelPrecisionBits                    = 4 /* FIXME */,
576       .subTexelPrecisionBits                    = 4 /* FIXME */,
577       .mipmapPrecisionBits                      = 4 /* FIXME */,
578       .maxDrawIndexedIndexValue                 = UINT32_MAX,
579       .maxDrawIndirectCount                     = UINT32_MAX,
580       .maxSamplerLodBias                        = 16,
581       .maxSamplerAnisotropy                     = 16,
582       .maxViewports                             = MAX_VIEWPORTS,
583       .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
584       .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
585       .viewportSubPixelBits                     = 13, /* We take a float? */
586       .minMemoryMapAlignment                    = 4096, /* A page */
587       .minTexelBufferOffsetAlignment            = 1,
588       .minUniformBufferOffsetAlignment          = 16,
589       .minStorageBufferOffsetAlignment          = 4,
590       .minTexelOffset                           = -8,
591       .maxTexelOffset                           = 7,
592       .minTexelGatherOffset                     = -32,
593       .maxTexelGatherOffset                     = 31,
594       .minInterpolationOffset                   = -0.5,
595       .maxInterpolationOffset                   = 0.4375,
596       .subPixelInterpolationOffsetBits          = 4,
597       .maxFramebufferWidth                      = (1 << 14),
598       .maxFramebufferHeight                     = (1 << 14),
599       .maxFramebufferLayers                     = (1 << 11),
600       .framebufferColorSampleCounts             = sample_counts,
601       .framebufferDepthSampleCounts             = sample_counts,
602       .framebufferStencilSampleCounts           = sample_counts,
603       .framebufferNoAttachmentsSampleCounts     = sample_counts,
604       .maxColorAttachments                      = MAX_RTS,
605       .sampledImageColorSampleCounts            = sample_counts,
606       .sampledImageIntegerSampleCounts          = VK_SAMPLE_COUNT_1_BIT,
607       .sampledImageDepthSampleCounts            = sample_counts,
608       .sampledImageStencilSampleCounts          = sample_counts,
609       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
610       .maxSampleMaskWords                       = 1,
611       .timestampComputeAndGraphics              = false,
612       .timestampPeriod                          = time_stamp_base,
613       .maxClipDistances                         = 8,
614       .maxCullDistances                         = 8,
615       .maxCombinedClipAndCullDistances          = 8,
616       .discreteQueuePriorities                  = 1,
617       .pointSizeRange                           = { 0.125, 255.875 },
618       .lineWidthRange                           = { 0.0, 7.9921875 },
619       .pointSizeGranularity                     = (1.0 / 8.0),
620       .lineWidthGranularity                     = (1.0 / 128.0),
621       .strictLines                              = false, /* FINISHME */
622       .standardSampleLocations                  = true,
623       .optimalBufferCopyOffsetAlignment         = 128,
624       .optimalBufferCopyRowPitchAlignment       = 128,
625       .nonCoherentAtomSize                      = 64,
626    };
627 
628    *pProperties = (VkPhysicalDeviceProperties) {
629       .apiVersion = VK_MAKE_VERSION(1, 0, 5),
630       .driverVersion = 1,
631       .vendorID = 0x8086,
632       .deviceID = pdevice->chipset_id,
633       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
634       .limits = limits,
635       .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
636    };
637 
638    strcpy(pProperties->deviceName, pdevice->name);
639    memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
640 }
641 
anv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties * pQueueFamilyProperties)642 void anv_GetPhysicalDeviceQueueFamilyProperties(
643     VkPhysicalDevice                            physicalDevice,
644     uint32_t*                                   pCount,
645     VkQueueFamilyProperties*                    pQueueFamilyProperties)
646 {
647    if (pQueueFamilyProperties == NULL) {
648       *pCount = 1;
649       return;
650    }
651 
652    /* The spec implicitly allows the incoming count to be 0. From the Vulkan
653     * 1.0.38 spec, Section 4.1 Physical Devices:
654     *
655     *     If the value referenced by pQueueFamilyPropertyCount is not 0 [then
656     *     do stuff].
657     */
658    if (*pCount == 0)
659       return;
660 
661    *pQueueFamilyProperties = (VkQueueFamilyProperties) {
662       .queueFlags = VK_QUEUE_GRAPHICS_BIT |
663                     VK_QUEUE_COMPUTE_BIT |
664                     VK_QUEUE_TRANSFER_BIT,
665       .queueCount = 1,
666       .timestampValidBits = 36, /* XXX: Real value here */
667       .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
668    };
669 
670    *pCount = 1;
671 }
672 
anv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)673 void anv_GetPhysicalDeviceMemoryProperties(
674     VkPhysicalDevice                            physicalDevice,
675     VkPhysicalDeviceMemoryProperties*           pMemoryProperties)
676 {
677    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
678    VkDeviceSize heap_size;
679 
680    /* Reserve some wiggle room for the driver by exposing only 75% of the
681     * aperture to the heap.
682     */
683    heap_size = 3 * physical_device->aperture_size / 4;
684 
685    if (physical_device->info.has_llc) {
686       /* Big core GPUs share LLC with the CPU and thus one memory type can be
687        * both cached and coherent at the same time.
688        */
689       pMemoryProperties->memoryTypeCount = 1;
690       pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
691          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
692                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
693                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
694                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
695          .heapIndex = 0,
696       };
697    } else {
698       /* The spec requires that we expose a host-visible, coherent memory
699        * type, but Atom GPUs don't share LLC. Thus we offer two memory types
700        * to give the application a choice between cached, but not coherent and
701        * coherent but uncached (WC though).
702        */
703       pMemoryProperties->memoryTypeCount = 2;
704       pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
705          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
706                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
707                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
708          .heapIndex = 0,
709       };
710       pMemoryProperties->memoryTypes[1] = (VkMemoryType) {
711          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
712                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
713                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
714          .heapIndex = 0,
715       };
716    }
717 
718    pMemoryProperties->memoryHeapCount = 1;
719    pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) {
720       .size = heap_size,
721       .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
722    };
723 }
724 
anv_GetInstanceProcAddr(VkInstance instance,const char * pName)725 PFN_vkVoidFunction anv_GetInstanceProcAddr(
726     VkInstance                                  instance,
727     const char*                                 pName)
728 {
729    return anv_lookup_entrypoint(NULL, pName);
730 }
731 
732 /* With version 1+ of the loader interface the ICD should expose
733  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
734  */
735 PUBLIC
736 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
737     VkInstance                                  instance,
738     const char*                                 pName);
739 
740 PUBLIC
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)741 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
742     VkInstance                                  instance,
743     const char*                                 pName)
744 {
745    return anv_GetInstanceProcAddr(instance, pName);
746 }
747 
anv_GetDeviceProcAddr(VkDevice _device,const char * pName)748 PFN_vkVoidFunction anv_GetDeviceProcAddr(
749     VkDevice                                    _device,
750     const char*                                 pName)
751 {
752    ANV_FROM_HANDLE(anv_device, device, _device);
753    return anv_lookup_entrypoint(&device->info, pName);
754 }
755 
756 static void
anv_queue_init(struct anv_device * device,struct anv_queue * queue)757 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
758 {
759    queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
760    queue->device = device;
761    queue->pool = &device->surface_state_pool;
762 }
763 
764 static void
anv_queue_finish(struct anv_queue * queue)765 anv_queue_finish(struct anv_queue *queue)
766 {
767 }
768 
769 static struct anv_state
anv_state_pool_emit_data(struct anv_state_pool * pool,size_t size,size_t align,const void * p)770 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
771 {
772    struct anv_state state;
773 
774    state = anv_state_pool_alloc(pool, size, align);
775    memcpy(state.map, p, size);
776 
777    if (!pool->block_pool->device->info.has_llc)
778       anv_state_clflush(state);
779 
780    return state;
781 }
782 
783 struct gen8_border_color {
784    union {
785       float float32[4];
786       uint32_t uint32[4];
787    };
788    /* Pad out to 64 bytes */
789    uint32_t _pad[12];
790 };
791 
792 static void
anv_device_init_border_colors(struct anv_device * device)793 anv_device_init_border_colors(struct anv_device *device)
794 {
795    static const struct gen8_border_color border_colors[] = {
796       [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
797       [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
798       [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
799       [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
800       [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
801       [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
802    };
803 
804    device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool,
805                                                     sizeof(border_colors), 64,
806                                                     border_colors);
807 }
808 
809 VkResult
anv_device_submit_simple_batch(struct anv_device * device,struct anv_batch * batch)810 anv_device_submit_simple_batch(struct anv_device *device,
811                                struct anv_batch *batch)
812 {
813    struct drm_i915_gem_execbuffer2 execbuf;
814    struct drm_i915_gem_exec_object2 exec2_objects[1];
815    struct anv_bo bo, *exec_bos[1];
816    VkResult result = VK_SUCCESS;
817    uint32_t size;
818    int64_t timeout;
819    int ret;
820 
821    /* Kernel driver requires 8 byte aligned batch length */
822    size = align_u32(batch->next - batch->start, 8);
823    result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size);
824    if (result != VK_SUCCESS)
825       return result;
826 
827    memcpy(bo.map, batch->start, size);
828    if (!device->info.has_llc)
829       anv_clflush_range(bo.map, size);
830 
831    exec_bos[0] = &bo;
832    exec2_objects[0].handle = bo.gem_handle;
833    exec2_objects[0].relocation_count = 0;
834    exec2_objects[0].relocs_ptr = 0;
835    exec2_objects[0].alignment = 0;
836    exec2_objects[0].offset = bo.offset;
837    exec2_objects[0].flags = 0;
838    exec2_objects[0].rsvd1 = 0;
839    exec2_objects[0].rsvd2 = 0;
840 
841    execbuf.buffers_ptr = (uintptr_t) exec2_objects;
842    execbuf.buffer_count = 1;
843    execbuf.batch_start_offset = 0;
844    execbuf.batch_len = size;
845    execbuf.cliprects_ptr = 0;
846    execbuf.num_cliprects = 0;
847    execbuf.DR1 = 0;
848    execbuf.DR4 = 0;
849 
850    execbuf.flags =
851       I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
852    execbuf.rsvd1 = device->context_id;
853    execbuf.rsvd2 = 0;
854 
855    result = anv_device_execbuf(device, &execbuf, exec_bos);
856    if (result != VK_SUCCESS)
857       goto fail;
858 
859    timeout = INT64_MAX;
860    ret = anv_gem_wait(device, bo.gem_handle, &timeout);
861    if (ret != 0) {
862       /* We don't know the real error. */
863       result = vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m");
864       goto fail;
865    }
866 
867  fail:
868    anv_bo_pool_free(&device->batch_bo_pool, &bo);
869 
870    return result;
871 }
872 
anv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)873 VkResult anv_CreateDevice(
874     VkPhysicalDevice                            physicalDevice,
875     const VkDeviceCreateInfo*                   pCreateInfo,
876     const VkAllocationCallbacks*                pAllocator,
877     VkDevice*                                   pDevice)
878 {
879    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
880    VkResult result;
881    struct anv_device *device;
882 
883    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
884 
885    for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
886       bool found = false;
887       for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) {
888          if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
889                     device_extensions[j].extensionName) == 0) {
890             found = true;
891             break;
892          }
893       }
894       if (!found)
895          return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
896    }
897 
898    device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
899                        sizeof(*device), 8,
900                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
901    if (!device)
902       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
903 
904    device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
905    device->instance = physical_device->instance;
906    device->chipset_id = physical_device->chipset_id;
907 
908    if (pAllocator)
909       device->alloc = *pAllocator;
910    else
911       device->alloc = physical_device->instance->alloc;
912 
913    /* XXX(chadv): Can we dup() physicalDevice->fd here? */
914    device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
915    if (device->fd == -1) {
916       result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
917       goto fail_device;
918    }
919 
920    device->context_id = anv_gem_create_context(device);
921    if (device->context_id == -1) {
922       result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
923       goto fail_fd;
924    }
925 
926    device->info = physical_device->info;
927    device->isl_dev = physical_device->isl_dev;
928 
929    /* On Broadwell and later, we can use batch chaining to more efficiently
930     * implement growing command buffers.  Prior to Haswell, the kernel
931     * command parser gets in the way and we have to fall back to growing
932     * the batch.
933     */
934    device->can_chain_batches = device->info.gen >= 8;
935 
936    device->robust_buffer_access = pCreateInfo->pEnabledFeatures &&
937       pCreateInfo->pEnabledFeatures->robustBufferAccess;
938 
939    pthread_mutex_init(&device->mutex, NULL);
940 
941    pthread_condattr_t condattr;
942    pthread_condattr_init(&condattr);
943    pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
944    pthread_cond_init(&device->queue_submit, NULL);
945    pthread_condattr_destroy(&condattr);
946 
947    anv_bo_pool_init(&device->batch_bo_pool, device);
948 
949    anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384);
950 
951    anv_state_pool_init(&device->dynamic_state_pool,
952                        &device->dynamic_state_block_pool);
953 
954    anv_block_pool_init(&device->instruction_block_pool, device, 1024 * 1024);
955    anv_state_pool_init(&device->instruction_state_pool,
956                        &device->instruction_block_pool);
957 
958    anv_block_pool_init(&device->surface_state_block_pool, device, 4096);
959 
960    anv_state_pool_init(&device->surface_state_pool,
961                        &device->surface_state_block_pool);
962 
963    anv_bo_init_new(&device->workaround_bo, device, 1024);
964 
965    anv_scratch_pool_init(device, &device->scratch_pool);
966 
967    anv_queue_init(device, &device->queue);
968 
969    switch (device->info.gen) {
970    case 7:
971       if (!device->info.is_haswell)
972          result = gen7_init_device_state(device);
973       else
974          result = gen75_init_device_state(device);
975       break;
976    case 8:
977       result = gen8_init_device_state(device);
978       break;
979    case 9:
980       result = gen9_init_device_state(device);
981       break;
982    default:
983       /* Shouldn't get here as we don't create physical devices for any other
984        * gens. */
985       unreachable("unhandled gen");
986    }
987    if (result != VK_SUCCESS)
988       goto fail_fd;
989 
990    anv_device_init_blorp(device);
991 
992    anv_device_init_border_colors(device);
993 
994    *pDevice = anv_device_to_handle(device);
995 
996    return VK_SUCCESS;
997 
998  fail_fd:
999    close(device->fd);
1000  fail_device:
1001    vk_free(&device->alloc, device);
1002 
1003    return result;
1004 }
1005 
anv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)1006 void anv_DestroyDevice(
1007     VkDevice                                    _device,
1008     const VkAllocationCallbacks*                pAllocator)
1009 {
1010    ANV_FROM_HANDLE(anv_device, device, _device);
1011 
1012    if (!device)
1013       return;
1014 
1015    anv_device_finish_blorp(device);
1016 
1017    anv_queue_finish(&device->queue);
1018 
1019 #ifdef HAVE_VALGRIND
1020    /* We only need to free these to prevent valgrind errors.  The backing
1021     * BO will go away in a couple of lines so we don't actually leak.
1022     */
1023    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
1024 #endif
1025 
1026    anv_scratch_pool_finish(device, &device->scratch_pool);
1027 
1028    anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
1029    anv_gem_close(device, device->workaround_bo.gem_handle);
1030 
1031    anv_state_pool_finish(&device->surface_state_pool);
1032    anv_block_pool_finish(&device->surface_state_block_pool);
1033    anv_state_pool_finish(&device->instruction_state_pool);
1034    anv_block_pool_finish(&device->instruction_block_pool);
1035    anv_state_pool_finish(&device->dynamic_state_pool);
1036    anv_block_pool_finish(&device->dynamic_state_block_pool);
1037 
1038    anv_bo_pool_finish(&device->batch_bo_pool);
1039 
1040    pthread_cond_destroy(&device->queue_submit);
1041    pthread_mutex_destroy(&device->mutex);
1042 
1043    anv_gem_destroy_context(device, device->context_id);
1044 
1045    close(device->fd);
1046 
1047    vk_free(&device->alloc, device);
1048 }
1049 
anv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)1050 VkResult anv_EnumerateInstanceExtensionProperties(
1051     const char*                                 pLayerName,
1052     uint32_t*                                   pPropertyCount,
1053     VkExtensionProperties*                      pProperties)
1054 {
1055    if (pProperties == NULL) {
1056       *pPropertyCount = ARRAY_SIZE(global_extensions);
1057       return VK_SUCCESS;
1058    }
1059 
1060    *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions));
1061    typed_memcpy(pProperties, global_extensions, *pPropertyCount);
1062 
1063    if (*pPropertyCount < ARRAY_SIZE(global_extensions))
1064       return VK_INCOMPLETE;
1065 
1066    return VK_SUCCESS;
1067 }
1068 
anv_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice,const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)1069 VkResult anv_EnumerateDeviceExtensionProperties(
1070     VkPhysicalDevice                            physicalDevice,
1071     const char*                                 pLayerName,
1072     uint32_t*                                   pPropertyCount,
1073     VkExtensionProperties*                      pProperties)
1074 {
1075    if (pProperties == NULL) {
1076       *pPropertyCount = ARRAY_SIZE(device_extensions);
1077       return VK_SUCCESS;
1078    }
1079 
1080    *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions));
1081    typed_memcpy(pProperties, device_extensions, *pPropertyCount);
1082 
1083    if (*pPropertyCount < ARRAY_SIZE(device_extensions))
1084       return VK_INCOMPLETE;
1085 
1086    return VK_SUCCESS;
1087 }
1088 
anv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)1089 VkResult anv_EnumerateInstanceLayerProperties(
1090     uint32_t*                                   pPropertyCount,
1091     VkLayerProperties*                          pProperties)
1092 {
1093    if (pProperties == NULL) {
1094       *pPropertyCount = 0;
1095       return VK_SUCCESS;
1096    }
1097 
1098    /* None supported at this time */
1099    return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1100 }
1101 
anv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkLayerProperties * pProperties)1102 VkResult anv_EnumerateDeviceLayerProperties(
1103     VkPhysicalDevice                            physicalDevice,
1104     uint32_t*                                   pPropertyCount,
1105     VkLayerProperties*                          pProperties)
1106 {
1107    if (pProperties == NULL) {
1108       *pPropertyCount = 0;
1109       return VK_SUCCESS;
1110    }
1111 
1112    /* None supported at this time */
1113    return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
1114 }
1115 
anv_GetDeviceQueue(VkDevice _device,uint32_t queueNodeIndex,uint32_t queueIndex,VkQueue * pQueue)1116 void anv_GetDeviceQueue(
1117     VkDevice                                    _device,
1118     uint32_t                                    queueNodeIndex,
1119     uint32_t                                    queueIndex,
1120     VkQueue*                                    pQueue)
1121 {
1122    ANV_FROM_HANDLE(anv_device, device, _device);
1123 
1124    assert(queueIndex == 0);
1125 
1126    *pQueue = anv_queue_to_handle(&device->queue);
1127 }
1128 
1129 VkResult
anv_device_execbuf(struct anv_device * device,struct drm_i915_gem_execbuffer2 * execbuf,struct anv_bo ** execbuf_bos)1130 anv_device_execbuf(struct anv_device *device,
1131                    struct drm_i915_gem_execbuffer2 *execbuf,
1132                    struct anv_bo **execbuf_bos)
1133 {
1134    int ret = anv_gem_execbuffer(device, execbuf);
1135    if (ret != 0) {
1136       /* We don't know the real error. */
1137       return vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m");
1138    }
1139 
1140    struct drm_i915_gem_exec_object2 *objects =
1141       (void *)(uintptr_t)execbuf->buffers_ptr;
1142    for (uint32_t k = 0; k < execbuf->buffer_count; k++)
1143       execbuf_bos[k]->offset = objects[k].offset;
1144 
1145    return VK_SUCCESS;
1146 }
1147 
anv_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence _fence)1148 VkResult anv_QueueSubmit(
1149     VkQueue                                     _queue,
1150     uint32_t                                    submitCount,
1151     const VkSubmitInfo*                         pSubmits,
1152     VkFence                                     _fence)
1153 {
1154    ANV_FROM_HANDLE(anv_queue, queue, _queue);
1155    ANV_FROM_HANDLE(anv_fence, fence, _fence);
1156    struct anv_device *device = queue->device;
1157    VkResult result = VK_SUCCESS;
1158 
1159    /* We lock around QueueSubmit for three main reasons:
1160     *
1161     *  1) When a block pool is resized, we create a new gem handle with a
1162     *     different size and, in the case of surface states, possibly a
1163     *     different center offset but we re-use the same anv_bo struct when
1164     *     we do so.  If this happens in the middle of setting up an execbuf,
1165     *     we could end up with our list of BOs out of sync with our list of
1166     *     gem handles.
1167     *
1168     *  2) The algorithm we use for building the list of unique buffers isn't
1169     *     thread-safe.  While the client is supposed to syncronize around
1170     *     QueueSubmit, this would be extremely difficult to debug if it ever
1171     *     came up in the wild due to a broken app.  It's better to play it
1172     *     safe and just lock around QueueSubmit.
1173     *
1174     *  3)  The anv_cmd_buffer_execbuf function may perform relocations in
1175     *      userspace.  Due to the fact that the surface state buffer is shared
1176     *      between batches, we can't afford to have that happen from multiple
1177     *      threads at the same time.  Even though the user is supposed to
1178     *      ensure this doesn't happen, we play it safe as in (2) above.
1179     *
1180     * Since the only other things that ever take the device lock such as block
1181     * pool resize only rarely happen, this will almost never be contended so
1182     * taking a lock isn't really an expensive operation in this case.
1183     */
1184    pthread_mutex_lock(&device->mutex);
1185 
1186    for (uint32_t i = 0; i < submitCount; i++) {
1187       for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
1188          ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
1189                          pSubmits[i].pCommandBuffers[j]);
1190          assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1191 
1192          result = anv_cmd_buffer_execbuf(device, cmd_buffer);
1193          if (result != VK_SUCCESS)
1194             goto out;
1195       }
1196    }
1197 
1198    if (fence) {
1199       struct anv_bo *fence_bo = &fence->bo;
1200       result = anv_device_execbuf(device, &fence->execbuf, &fence_bo);
1201       if (result != VK_SUCCESS)
1202          goto out;
1203 
1204       /* Update the fence and wake up any waiters */
1205       assert(fence->state == ANV_FENCE_STATE_RESET);
1206       fence->state = ANV_FENCE_STATE_SUBMITTED;
1207       pthread_cond_broadcast(&device->queue_submit);
1208    }
1209 
1210 out:
1211    pthread_mutex_unlock(&device->mutex);
1212 
1213    return result;
1214 }
1215 
anv_QueueWaitIdle(VkQueue _queue)1216 VkResult anv_QueueWaitIdle(
1217     VkQueue                                     _queue)
1218 {
1219    ANV_FROM_HANDLE(anv_queue, queue, _queue);
1220 
1221    return anv_DeviceWaitIdle(anv_device_to_handle(queue->device));
1222 }
1223 
anv_DeviceWaitIdle(VkDevice _device)1224 VkResult anv_DeviceWaitIdle(
1225     VkDevice                                    _device)
1226 {
1227    ANV_FROM_HANDLE(anv_device, device, _device);
1228    struct anv_batch batch;
1229 
1230    uint32_t cmds[8];
1231    batch.start = batch.next = cmds;
1232    batch.end = (void *) cmds + sizeof(cmds);
1233 
1234    anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
1235    anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
1236 
1237    return anv_device_submit_simple_batch(device, &batch);
1238 }
1239 
1240 VkResult
anv_bo_init_new(struct anv_bo * bo,struct anv_device * device,uint64_t size)1241 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
1242 {
1243    uint32_t gem_handle = anv_gem_create(device, size);
1244    if (!gem_handle)
1245       return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
1246 
1247    anv_bo_init(bo, gem_handle, size);
1248 
1249    return VK_SUCCESS;
1250 }
1251 
anv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)1252 VkResult anv_AllocateMemory(
1253     VkDevice                                    _device,
1254     const VkMemoryAllocateInfo*                 pAllocateInfo,
1255     const VkAllocationCallbacks*                pAllocator,
1256     VkDeviceMemory*                             pMem)
1257 {
1258    ANV_FROM_HANDLE(anv_device, device, _device);
1259    struct anv_device_memory *mem;
1260    VkResult result;
1261 
1262    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1263 
1264    /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
1265    assert(pAllocateInfo->allocationSize > 0);
1266 
1267    /* We support exactly one memory heap. */
1268    assert(pAllocateInfo->memoryTypeIndex == 0 ||
1269           (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2));
1270 
1271    /* FINISHME: Fail if allocation request exceeds heap size. */
1272 
1273    mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
1274                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1275    if (mem == NULL)
1276       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1277 
1278    /* The kernel is going to give us whole pages anyway */
1279    uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
1280 
1281    result = anv_bo_init_new(&mem->bo, device, alloc_size);
1282    if (result != VK_SUCCESS)
1283       goto fail;
1284 
1285    mem->type_index = pAllocateInfo->memoryTypeIndex;
1286 
1287    mem->map = NULL;
1288    mem->map_size = 0;
1289 
1290    *pMem = anv_device_memory_to_handle(mem);
1291 
1292    return VK_SUCCESS;
1293 
1294  fail:
1295    vk_free2(&device->alloc, pAllocator, mem);
1296 
1297    return result;
1298 }
1299 
anv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)1300 void anv_FreeMemory(
1301     VkDevice                                    _device,
1302     VkDeviceMemory                              _mem,
1303     const VkAllocationCallbacks*                pAllocator)
1304 {
1305    ANV_FROM_HANDLE(anv_device, device, _device);
1306    ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
1307 
1308    if (mem == NULL)
1309       return;
1310 
1311    if (mem->map)
1312       anv_UnmapMemory(_device, _mem);
1313 
1314    if (mem->bo.map)
1315       anv_gem_munmap(mem->bo.map, mem->bo.size);
1316 
1317    if (mem->bo.gem_handle != 0)
1318       anv_gem_close(device, mem->bo.gem_handle);
1319 
1320    vk_free2(&device->alloc, pAllocator, mem);
1321 }
1322 
anv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)1323 VkResult anv_MapMemory(
1324     VkDevice                                    _device,
1325     VkDeviceMemory                              _memory,
1326     VkDeviceSize                                offset,
1327     VkDeviceSize                                size,
1328     VkMemoryMapFlags                            flags,
1329     void**                                      ppData)
1330 {
1331    ANV_FROM_HANDLE(anv_device, device, _device);
1332    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1333 
1334    if (mem == NULL) {
1335       *ppData = NULL;
1336       return VK_SUCCESS;
1337    }
1338 
1339    if (size == VK_WHOLE_SIZE)
1340       size = mem->bo.size - offset;
1341 
1342    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
1343     *
1344     *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
1345     *    assert(size != 0);
1346     *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
1347     *    equal to the size of the memory minus offset
1348     */
1349    assert(size > 0);
1350    assert(offset + size <= mem->bo.size);
1351 
1352    /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
1353     * takes a VkDeviceMemory pointer, it seems like only one map of the memory
1354     * at a time is valid. We could just mmap up front and return an offset
1355     * pointer here, but that may exhaust virtual memory on 32 bit
1356     * userspace. */
1357 
1358    uint32_t gem_flags = 0;
1359    if (!device->info.has_llc && mem->type_index == 0)
1360       gem_flags |= I915_MMAP_WC;
1361 
1362    /* GEM will fail to map if the offset isn't 4k-aligned.  Round down. */
1363    uint64_t map_offset = offset & ~4095ull;
1364    assert(offset >= map_offset);
1365    uint64_t map_size = (offset + size) - map_offset;
1366 
1367    /* Let's map whole pages */
1368    map_size = align_u64(map_size, 4096);
1369 
1370    void *map = anv_gem_mmap(device, mem->bo.gem_handle,
1371                             map_offset, map_size, gem_flags);
1372    if (map == MAP_FAILED)
1373       return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
1374 
1375    mem->map = map;
1376    mem->map_size = map_size;
1377 
1378    *ppData = mem->map + (offset - map_offset);
1379 
1380    return VK_SUCCESS;
1381 }
1382 
anv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)1383 void anv_UnmapMemory(
1384     VkDevice                                    _device,
1385     VkDeviceMemory                              _memory)
1386 {
1387    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1388 
1389    if (mem == NULL)
1390       return;
1391 
1392    anv_gem_munmap(mem->map, mem->map_size);
1393 
1394    mem->map = NULL;
1395    mem->map_size = 0;
1396 }
1397 
1398 static void
clflush_mapped_ranges(struct anv_device * device,uint32_t count,const VkMappedMemoryRange * ranges)1399 clflush_mapped_ranges(struct anv_device         *device,
1400                       uint32_t                   count,
1401                       const VkMappedMemoryRange *ranges)
1402 {
1403    for (uint32_t i = 0; i < count; i++) {
1404       ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory);
1405       void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK);
1406       void *end;
1407 
1408       if (ranges[i].offset + ranges[i].size > mem->map_size)
1409          end = mem->map + mem->map_size;
1410       else
1411          end = mem->map + ranges[i].offset + ranges[i].size;
1412 
1413       while (p < end) {
1414          __builtin_ia32_clflush(p);
1415          p += CACHELINE_SIZE;
1416       }
1417    }
1418 }
1419 
anv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)1420 VkResult anv_FlushMappedMemoryRanges(
1421     VkDevice                                    _device,
1422     uint32_t                                    memoryRangeCount,
1423     const VkMappedMemoryRange*                  pMemoryRanges)
1424 {
1425    ANV_FROM_HANDLE(anv_device, device, _device);
1426 
1427    if (device->info.has_llc)
1428       return VK_SUCCESS;
1429 
1430    /* Make sure the writes we're flushing have landed. */
1431    __builtin_ia32_mfence();
1432 
1433    clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1434 
1435    return VK_SUCCESS;
1436 }
1437 
anv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)1438 VkResult anv_InvalidateMappedMemoryRanges(
1439     VkDevice                                    _device,
1440     uint32_t                                    memoryRangeCount,
1441     const VkMappedMemoryRange*                  pMemoryRanges)
1442 {
1443    ANV_FROM_HANDLE(anv_device, device, _device);
1444 
1445    if (device->info.has_llc)
1446       return VK_SUCCESS;
1447 
1448    clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
1449 
1450    /* Make sure no reads get moved up above the invalidate. */
1451    __builtin_ia32_mfence();
1452 
1453    return VK_SUCCESS;
1454 }
1455 
anv_GetBufferMemoryRequirements(VkDevice _device,VkBuffer _buffer,VkMemoryRequirements * pMemoryRequirements)1456 void anv_GetBufferMemoryRequirements(
1457     VkDevice                                    _device,
1458     VkBuffer                                    _buffer,
1459     VkMemoryRequirements*                       pMemoryRequirements)
1460 {
1461    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1462    ANV_FROM_HANDLE(anv_device, device, _device);
1463 
1464    /* The Vulkan spec (git aaed022) says:
1465     *
1466     *    memoryTypeBits is a bitfield and contains one bit set for every
1467     *    supported memory type for the resource. The bit `1<<i` is set if and
1468     *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1469     *    structure for the physical device is supported.
1470     *
1471     * We support exactly one memory type on LLC, two on non-LLC.
1472     */
1473    pMemoryRequirements->memoryTypeBits = device->info.has_llc ? 1 : 3;
1474 
1475    pMemoryRequirements->size = buffer->size;
1476    pMemoryRequirements->alignment = 16;
1477 }
1478 
anv_GetImageMemoryRequirements(VkDevice _device,VkImage _image,VkMemoryRequirements * pMemoryRequirements)1479 void anv_GetImageMemoryRequirements(
1480     VkDevice                                    _device,
1481     VkImage                                     _image,
1482     VkMemoryRequirements*                       pMemoryRequirements)
1483 {
1484    ANV_FROM_HANDLE(anv_image, image, _image);
1485    ANV_FROM_HANDLE(anv_device, device, _device);
1486 
1487    /* The Vulkan spec (git aaed022) says:
1488     *
1489     *    memoryTypeBits is a bitfield and contains one bit set for every
1490     *    supported memory type for the resource. The bit `1<<i` is set if and
1491     *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
1492     *    structure for the physical device is supported.
1493     *
1494     * We support exactly one memory type on LLC, two on non-LLC.
1495     */
1496    pMemoryRequirements->memoryTypeBits = device->info.has_llc ? 1 : 3;
1497 
1498    pMemoryRequirements->size = image->size;
1499    pMemoryRequirements->alignment = image->alignment;
1500 }
1501 
anv_GetImageSparseMemoryRequirements(VkDevice device,VkImage image,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements * pSparseMemoryRequirements)1502 void anv_GetImageSparseMemoryRequirements(
1503     VkDevice                                    device,
1504     VkImage                                     image,
1505     uint32_t*                                   pSparseMemoryRequirementCount,
1506     VkSparseImageMemoryRequirements*            pSparseMemoryRequirements)
1507 {
1508    stub();
1509 }
1510 
anv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)1511 void anv_GetDeviceMemoryCommitment(
1512     VkDevice                                    device,
1513     VkDeviceMemory                              memory,
1514     VkDeviceSize*                               pCommittedMemoryInBytes)
1515 {
1516    *pCommittedMemoryInBytes = 0;
1517 }
1518 
anv_BindBufferMemory(VkDevice device,VkBuffer _buffer,VkDeviceMemory _memory,VkDeviceSize memoryOffset)1519 VkResult anv_BindBufferMemory(
1520     VkDevice                                    device,
1521     VkBuffer                                    _buffer,
1522     VkDeviceMemory                              _memory,
1523     VkDeviceSize                                memoryOffset)
1524 {
1525    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
1526    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1527 
1528    if (mem) {
1529       buffer->bo = &mem->bo;
1530       buffer->offset = memoryOffset;
1531    } else {
1532       buffer->bo = NULL;
1533       buffer->offset = 0;
1534    }
1535 
1536    return VK_SUCCESS;
1537 }
1538 
anv_QueueBindSparse(VkQueue queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)1539 VkResult anv_QueueBindSparse(
1540     VkQueue                                     queue,
1541     uint32_t                                    bindInfoCount,
1542     const VkBindSparseInfo*                     pBindInfo,
1543     VkFence                                     fence)
1544 {
1545    stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
1546 }
1547 
anv_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)1548 VkResult anv_CreateFence(
1549     VkDevice                                    _device,
1550     const VkFenceCreateInfo*                    pCreateInfo,
1551     const VkAllocationCallbacks*                pAllocator,
1552     VkFence*                                    pFence)
1553 {
1554    ANV_FROM_HANDLE(anv_device, device, _device);
1555    struct anv_bo fence_bo;
1556    struct anv_fence *fence;
1557    struct anv_batch batch;
1558    VkResult result;
1559 
1560    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1561 
1562    result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096);
1563    if (result != VK_SUCCESS)
1564       return result;
1565 
1566    /* Fences are small.  Just store the CPU data structure in the BO. */
1567    fence = fence_bo.map;
1568    fence->bo = fence_bo;
1569 
1570    /* Place the batch after the CPU data but on its own cache line. */
1571    const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE);
1572    batch.next = batch.start = fence->bo.map + batch_offset;
1573    batch.end = fence->bo.map + fence->bo.size;
1574    anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
1575    anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
1576 
1577    if (!device->info.has_llc) {
1578       assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0);
1579       assert(batch.next - batch.start <= CACHELINE_SIZE);
1580       __builtin_ia32_mfence();
1581       __builtin_ia32_clflush(batch.start);
1582    }
1583 
1584    fence->exec2_objects[0].handle = fence->bo.gem_handle;
1585    fence->exec2_objects[0].relocation_count = 0;
1586    fence->exec2_objects[0].relocs_ptr = 0;
1587    fence->exec2_objects[0].alignment = 0;
1588    fence->exec2_objects[0].offset = fence->bo.offset;
1589    fence->exec2_objects[0].flags = 0;
1590    fence->exec2_objects[0].rsvd1 = 0;
1591    fence->exec2_objects[0].rsvd2 = 0;
1592 
1593    fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
1594    fence->execbuf.buffer_count = 1;
1595    fence->execbuf.batch_start_offset = batch.start - fence->bo.map;
1596    fence->execbuf.batch_len = batch.next - batch.start;
1597    fence->execbuf.cliprects_ptr = 0;
1598    fence->execbuf.num_cliprects = 0;
1599    fence->execbuf.DR1 = 0;
1600    fence->execbuf.DR4 = 0;
1601 
1602    fence->execbuf.flags =
1603       I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
1604    fence->execbuf.rsvd1 = device->context_id;
1605    fence->execbuf.rsvd2 = 0;
1606 
1607    if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
1608       fence->state = ANV_FENCE_STATE_SIGNALED;
1609    } else {
1610       fence->state = ANV_FENCE_STATE_RESET;
1611    }
1612 
1613    *pFence = anv_fence_to_handle(fence);
1614 
1615    return VK_SUCCESS;
1616 }
1617 
anv_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)1618 void anv_DestroyFence(
1619     VkDevice                                    _device,
1620     VkFence                                     _fence,
1621     const VkAllocationCallbacks*                pAllocator)
1622 {
1623    ANV_FROM_HANDLE(anv_device, device, _device);
1624    ANV_FROM_HANDLE(anv_fence, fence, _fence);
1625 
1626    if (!fence)
1627       return;
1628 
1629    assert(fence->bo.map == fence);
1630    anv_bo_pool_free(&device->batch_bo_pool, &fence->bo);
1631 }
1632 
anv_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)1633 VkResult anv_ResetFences(
1634     VkDevice                                    _device,
1635     uint32_t                                    fenceCount,
1636     const VkFence*                              pFences)
1637 {
1638    for (uint32_t i = 0; i < fenceCount; i++) {
1639       ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1640       fence->state = ANV_FENCE_STATE_RESET;
1641    }
1642 
1643    return VK_SUCCESS;
1644 }
1645 
anv_GetFenceStatus(VkDevice _device,VkFence _fence)1646 VkResult anv_GetFenceStatus(
1647     VkDevice                                    _device,
1648     VkFence                                     _fence)
1649 {
1650    ANV_FROM_HANDLE(anv_device, device, _device);
1651    ANV_FROM_HANDLE(anv_fence, fence, _fence);
1652    int64_t t = 0;
1653    int ret;
1654 
1655    switch (fence->state) {
1656    case ANV_FENCE_STATE_RESET:
1657       /* If it hasn't even been sent off to the GPU yet, it's not ready */
1658       return VK_NOT_READY;
1659 
1660    case ANV_FENCE_STATE_SIGNALED:
1661       /* It's been signaled, return success */
1662       return VK_SUCCESS;
1663 
1664    case ANV_FENCE_STATE_SUBMITTED:
1665       /* It's been submitted to the GPU but we don't know if it's done yet. */
1666       ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1667       if (ret == 0) {
1668          fence->state = ANV_FENCE_STATE_SIGNALED;
1669          return VK_SUCCESS;
1670       } else {
1671          return VK_NOT_READY;
1672       }
1673    default:
1674       unreachable("Invalid fence status");
1675    }
1676 }
1677 
1678 #define NSEC_PER_SEC 1000000000
1679 #define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1)
1680 
anv_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t _timeout)1681 VkResult anv_WaitForFences(
1682     VkDevice                                    _device,
1683     uint32_t                                    fenceCount,
1684     const VkFence*                              pFences,
1685     VkBool32                                    waitAll,
1686     uint64_t                                    _timeout)
1687 {
1688    ANV_FROM_HANDLE(anv_device, device, _device);
1689    int ret;
1690 
1691    /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed
1692     * to block indefinitely timeouts <= 0.  Unfortunately, this was broken
1693     * for a couple of kernel releases.  Since there's no way to know
1694     * whether or not the kernel we're using is one of the broken ones, the
1695     * best we can do is to clamp the timeout to INT64_MAX.  This limits the
1696     * maximum timeout from 584 years to 292 years - likely not a big deal.
1697     */
1698    int64_t timeout = MIN2(_timeout, INT64_MAX);
1699 
1700    uint32_t pending_fences = fenceCount;
1701    while (pending_fences) {
1702       pending_fences = 0;
1703       bool signaled_fences = false;
1704       for (uint32_t i = 0; i < fenceCount; i++) {
1705          ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1706          switch (fence->state) {
1707          case ANV_FENCE_STATE_RESET:
1708             /* This fence hasn't been submitted yet, we'll catch it the next
1709              * time around.  Yes, this may mean we dead-loop but, short of
1710              * lots of locking and a condition variable, there's not much that
1711              * we can do about that.
1712              */
1713             pending_fences++;
1714             continue;
1715 
1716          case ANV_FENCE_STATE_SIGNALED:
1717             /* This fence is not pending.  If waitAll isn't set, we can return
1718              * early.  Otherwise, we have to keep going.
1719              */
1720             if (!waitAll)
1721                return VK_SUCCESS;
1722             continue;
1723 
1724          case ANV_FENCE_STATE_SUBMITTED:
1725             /* These are the fences we really care about.  Go ahead and wait
1726              * on it until we hit a timeout.
1727              */
1728             ret = anv_gem_wait(device, fence->bo.gem_handle, &timeout);
1729             if (ret == -1 && errno == ETIME) {
1730                return VK_TIMEOUT;
1731             } else if (ret == -1) {
1732                /* We don't know the real error. */
1733                return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m");
1734             } else {
1735                fence->state = ANV_FENCE_STATE_SIGNALED;
1736                signaled_fences = true;
1737                if (!waitAll)
1738                   return VK_SUCCESS;
1739                continue;
1740             }
1741          }
1742       }
1743 
1744       if (pending_fences && !signaled_fences) {
1745          /* If we've hit this then someone decided to vkWaitForFences before
1746           * they've actually submitted any of them to a queue.  This is a
1747           * fairly pessimal case, so it's ok to lock here and use a standard
1748           * pthreads condition variable.
1749           */
1750          pthread_mutex_lock(&device->mutex);
1751 
1752          /* It's possible that some of the fences have changed state since the
1753           * last time we checked.  Now that we have the lock, check for
1754           * pending fences again and don't wait if it's changed.
1755           */
1756          uint32_t now_pending_fences = 0;
1757          for (uint32_t i = 0; i < fenceCount; i++) {
1758             ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
1759             if (fence->state == ANV_FENCE_STATE_RESET)
1760                now_pending_fences++;
1761          }
1762          assert(now_pending_fences <= pending_fences);
1763 
1764          if (now_pending_fences == pending_fences) {
1765             struct timespec before;
1766             clock_gettime(CLOCK_MONOTONIC, &before);
1767 
1768             uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC;
1769             uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) +
1770                                (timeout / NSEC_PER_SEC);
1771             abs_nsec %= NSEC_PER_SEC;
1772 
1773             /* Avoid roll-over in tv_sec on 32-bit systems if the user
1774              * provided timeout is UINT64_MAX
1775              */
1776             struct timespec abstime;
1777             abstime.tv_nsec = abs_nsec;
1778             abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec));
1779 
1780             ret = pthread_cond_timedwait(&device->queue_submit,
1781                                          &device->mutex, &abstime);
1782             assert(ret != EINVAL);
1783 
1784             struct timespec after;
1785             clock_gettime(CLOCK_MONOTONIC, &after);
1786             uint64_t time_elapsed =
1787                ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) -
1788                ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec);
1789 
1790             if (time_elapsed >= timeout) {
1791                pthread_mutex_unlock(&device->mutex);
1792                return VK_TIMEOUT;
1793             }
1794 
1795             timeout -= time_elapsed;
1796          }
1797 
1798          pthread_mutex_unlock(&device->mutex);
1799       }
1800    }
1801 
1802    return VK_SUCCESS;
1803 }
1804 
1805 // Queue semaphore functions
1806 
anv_CreateSemaphore(VkDevice device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)1807 VkResult anv_CreateSemaphore(
1808     VkDevice                                    device,
1809     const VkSemaphoreCreateInfo*                pCreateInfo,
1810     const VkAllocationCallbacks*                pAllocator,
1811     VkSemaphore*                                pSemaphore)
1812 {
1813    /* The DRM execbuffer ioctl always execute in-oder, even between different
1814     * rings. As such, there's nothing to do for the user space semaphore.
1815     */
1816 
1817    *pSemaphore = (VkSemaphore)1;
1818 
1819    return VK_SUCCESS;
1820 }
1821 
anv_DestroySemaphore(VkDevice device,VkSemaphore semaphore,const VkAllocationCallbacks * pAllocator)1822 void anv_DestroySemaphore(
1823     VkDevice                                    device,
1824     VkSemaphore                                 semaphore,
1825     const VkAllocationCallbacks*                pAllocator)
1826 {
1827 }
1828 
1829 // Event functions
1830 
anv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)1831 VkResult anv_CreateEvent(
1832     VkDevice                                    _device,
1833     const VkEventCreateInfo*                    pCreateInfo,
1834     const VkAllocationCallbacks*                pAllocator,
1835     VkEvent*                                    pEvent)
1836 {
1837    ANV_FROM_HANDLE(anv_device, device, _device);
1838    struct anv_state state;
1839    struct anv_event *event;
1840 
1841    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
1842 
1843    state = anv_state_pool_alloc(&device->dynamic_state_pool,
1844                                 sizeof(*event), 8);
1845    event = state.map;
1846    event->state = state;
1847    event->semaphore = VK_EVENT_RESET;
1848 
1849    if (!device->info.has_llc) {
1850       /* Make sure the writes we're flushing have landed. */
1851       __builtin_ia32_mfence();
1852       __builtin_ia32_clflush(event);
1853    }
1854 
1855    *pEvent = anv_event_to_handle(event);
1856 
1857    return VK_SUCCESS;
1858 }
1859 
anv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)1860 void anv_DestroyEvent(
1861     VkDevice                                    _device,
1862     VkEvent                                     _event,
1863     const VkAllocationCallbacks*                pAllocator)
1864 {
1865    ANV_FROM_HANDLE(anv_device, device, _device);
1866    ANV_FROM_HANDLE(anv_event, event, _event);
1867 
1868    if (!event)
1869       return;
1870 
1871    anv_state_pool_free(&device->dynamic_state_pool, event->state);
1872 }
1873 
anv_GetEventStatus(VkDevice _device,VkEvent _event)1874 VkResult anv_GetEventStatus(
1875     VkDevice                                    _device,
1876     VkEvent                                     _event)
1877 {
1878    ANV_FROM_HANDLE(anv_device, device, _device);
1879    ANV_FROM_HANDLE(anv_event, event, _event);
1880 
1881    if (!device->info.has_llc) {
1882       /* Invalidate read cache before reading event written by GPU. */
1883       __builtin_ia32_clflush(event);
1884       __builtin_ia32_mfence();
1885 
1886    }
1887 
1888    return event->semaphore;
1889 }
1890 
anv_SetEvent(VkDevice _device,VkEvent _event)1891 VkResult anv_SetEvent(
1892     VkDevice                                    _device,
1893     VkEvent                                     _event)
1894 {
1895    ANV_FROM_HANDLE(anv_device, device, _device);
1896    ANV_FROM_HANDLE(anv_event, event, _event);
1897 
1898    event->semaphore = VK_EVENT_SET;
1899 
1900    if (!device->info.has_llc) {
1901       /* Make sure the writes we're flushing have landed. */
1902       __builtin_ia32_mfence();
1903       __builtin_ia32_clflush(event);
1904    }
1905 
1906    return VK_SUCCESS;
1907 }
1908 
anv_ResetEvent(VkDevice _device,VkEvent _event)1909 VkResult anv_ResetEvent(
1910     VkDevice                                    _device,
1911     VkEvent                                     _event)
1912 {
1913    ANV_FROM_HANDLE(anv_device, device, _device);
1914    ANV_FROM_HANDLE(anv_event, event, _event);
1915 
1916    event->semaphore = VK_EVENT_RESET;
1917 
1918    if (!device->info.has_llc) {
1919       /* Make sure the writes we're flushing have landed. */
1920       __builtin_ia32_mfence();
1921       __builtin_ia32_clflush(event);
1922    }
1923 
1924    return VK_SUCCESS;
1925 }
1926 
1927 // Buffer functions
1928 
anv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)1929 VkResult anv_CreateBuffer(
1930     VkDevice                                    _device,
1931     const VkBufferCreateInfo*                   pCreateInfo,
1932     const VkAllocationCallbacks*                pAllocator,
1933     VkBuffer*                                   pBuffer)
1934 {
1935    ANV_FROM_HANDLE(anv_device, device, _device);
1936    struct anv_buffer *buffer;
1937 
1938    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1939 
1940    buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
1941                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1942    if (buffer == NULL)
1943       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1944 
1945    buffer->size = pCreateInfo->size;
1946    buffer->usage = pCreateInfo->usage;
1947    buffer->bo = NULL;
1948    buffer->offset = 0;
1949 
1950    *pBuffer = anv_buffer_to_handle(buffer);
1951 
1952    return VK_SUCCESS;
1953 }
1954 
anv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)1955 void anv_DestroyBuffer(
1956     VkDevice                                    _device,
1957     VkBuffer                                    _buffer,
1958     const VkAllocationCallbacks*                pAllocator)
1959 {
1960    ANV_FROM_HANDLE(anv_device, device, _device);
1961    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
1962 
1963    if (!buffer)
1964       return;
1965 
1966    vk_free2(&device->alloc, pAllocator, buffer);
1967 }
1968 
1969 void
anv_fill_buffer_surface_state(struct anv_device * device,struct anv_state state,enum isl_format format,uint32_t offset,uint32_t range,uint32_t stride)1970 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
1971                               enum isl_format format,
1972                               uint32_t offset, uint32_t range, uint32_t stride)
1973 {
1974    isl_buffer_fill_state(&device->isl_dev, state.map,
1975                          .address = offset,
1976                          .mocs = device->default_mocs,
1977                          .size = range,
1978                          .format = format,
1979                          .stride = stride);
1980 
1981    if (!device->info.has_llc)
1982       anv_state_clflush(state);
1983 }
1984 
anv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)1985 void anv_DestroySampler(
1986     VkDevice                                    _device,
1987     VkSampler                                   _sampler,
1988     const VkAllocationCallbacks*                pAllocator)
1989 {
1990    ANV_FROM_HANDLE(anv_device, device, _device);
1991    ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
1992 
1993    if (!sampler)
1994       return;
1995 
1996    vk_free2(&device->alloc, pAllocator, sampler);
1997 }
1998 
anv_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)1999 VkResult anv_CreateFramebuffer(
2000     VkDevice                                    _device,
2001     const VkFramebufferCreateInfo*              pCreateInfo,
2002     const VkAllocationCallbacks*                pAllocator,
2003     VkFramebuffer*                              pFramebuffer)
2004 {
2005    ANV_FROM_HANDLE(anv_device, device, _device);
2006    struct anv_framebuffer *framebuffer;
2007 
2008    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2009 
2010    size_t size = sizeof(*framebuffer) +
2011                  sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
2012    framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
2013                             VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2014    if (framebuffer == NULL)
2015       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2016 
2017    framebuffer->attachment_count = pCreateInfo->attachmentCount;
2018    for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2019       VkImageView _iview = pCreateInfo->pAttachments[i];
2020       framebuffer->attachments[i] = anv_image_view_from_handle(_iview);
2021    }
2022 
2023    framebuffer->width = pCreateInfo->width;
2024    framebuffer->height = pCreateInfo->height;
2025    framebuffer->layers = pCreateInfo->layers;
2026 
2027    *pFramebuffer = anv_framebuffer_to_handle(framebuffer);
2028 
2029    return VK_SUCCESS;
2030 }
2031 
anv_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)2032 void anv_DestroyFramebuffer(
2033     VkDevice                                    _device,
2034     VkFramebuffer                               _fb,
2035     const VkAllocationCallbacks*                pAllocator)
2036 {
2037    ANV_FROM_HANDLE(anv_device, device, _device);
2038    ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
2039 
2040    if (!fb)
2041       return;
2042 
2043    vk_free2(&device->alloc, pAllocator, fb);
2044 }
2045 
2046 /* vk_icd.h does not declare this function, so we declare it here to
2047  * suppress Wmissing-prototypes.
2048  */
2049 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2050 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
2051 
2052 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t * pSupportedVersion)2053 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
2054 {
2055    /* For the full details on loader interface versioning, see
2056     * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2057     * What follows is a condensed summary, to help you navigate the large and
2058     * confusing official doc.
2059     *
2060     *   - Loader interface v0 is incompatible with later versions. We don't
2061     *     support it.
2062     *
2063     *   - In loader interface v1:
2064     *       - The first ICD entrypoint called by the loader is
2065     *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2066     *         entrypoint.
2067     *       - The ICD must statically expose no other Vulkan symbol unless it is
2068     *         linked with -Bsymbolic.
2069     *       - Each dispatchable Vulkan handle created by the ICD must be
2070     *         a pointer to a struct whose first member is VK_LOADER_DATA. The
2071     *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2072     *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2073     *         vkDestroySurfaceKHR(). The ICD must be capable of working with
2074     *         such loader-managed surfaces.
2075     *
2076     *    - Loader interface v2 differs from v1 in:
2077     *       - The first ICD entrypoint called by the loader is
2078     *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2079     *         statically expose this entrypoint.
2080     *
2081     *    - Loader interface v3 differs from v2 in:
2082     *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2083     *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2084     *          because the loader no longer does so.
2085     */
2086    *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
2087    return VK_SUCCESS;
2088 }
2089