/external/mesa3d/src/gallium/drivers/vc4/ |
D | vc4_context.c | 43 struct vc4_context *vc4 = vc4_context(pctx); in vc4_flush() local 45 hash_table_foreach(vc4->jobs, entry) { in vc4_flush() 47 vc4_job_submit(vc4, job); in vc4_flush() 55 struct vc4_context *vc4 = vc4_context(pctx); in vc4_pipe_flush() local 65 drmSyncobjExportSyncFile(vc4->fd, vc4->job_syncobj, in vc4_pipe_flush() 69 struct vc4_fence *f = vc4_fence_create(vc4->screen, in vc4_pipe_flush() 70 vc4->last_emit_seqno, in vc4_pipe_flush() 91 struct vc4_context *vc4 = vc4_context(pctx); in vc4_set_debug_callback() local 94 vc4->debug = *cb; in vc4_set_debug_callback() 96 memset(&vc4->debug, 0, sizeof(vc4->debug)); in vc4_set_debug_callback() [all …]
|
D | vc4_draw.c | 76 vc4_start_draw(struct vc4_context *vc4) in vc4_start_draw() argument 78 struct vc4_job *job = vc4->job; in vc4_start_draw() 109 job->draw_width = vc4->framebuffer.width; in vc4_start_draw() 110 job->draw_height = vc4->framebuffer.height; in vc4_start_draw() 117 struct vc4_context *vc4 = vc4_context(pctx); in vc4_predraw_check_textures() local 128 vc4_flush_jobs_writing_resource(vc4, view->texture); in vc4_predraw_check_textures() 133 vc4_emit_gl_shader_state(struct vc4_context *vc4, in vc4_emit_gl_shader_state() argument 137 struct vc4_job *job = vc4->job; in vc4_emit_gl_shader_state() 139 struct vc4_vertex_stateobj *vtx = vc4->vtx; in vc4_emit_gl_shader_state() 141 struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf; in vc4_emit_gl_shader_state() [all …]
|
D | vc4_job.c | 26 * Functions for submitting VC4 render jobs to the kernel. 35 vc4_job_free(struct vc4_context *vc4, struct vc4_job *job) in vc4_job_free() argument 42 _mesa_hash_table_remove_key(vc4->jobs, &job->key); in vc4_job_free() 45 _mesa_hash_table_remove_key(vc4->write_jobs, in vc4_job_free() 50 _mesa_hash_table_remove_key(vc4->write_jobs, in vc4_job_free() 55 _mesa_hash_table_remove_key(vc4->write_jobs, in vc4_job_free() 60 _mesa_hash_table_remove_key(vc4->write_jobs, in vc4_job_free() 68 if (vc4->job == job) in vc4_job_free() 69 vc4->job = NULL; in vc4_job_free() 75 vc4_job_create(struct vc4_context *vc4) in vc4_job_create() argument [all …]
|
D | vc4_emit.c | 29 struct vc4_context *vc4 = vc4_context(pctx); in vc4_emit_state() local 30 struct vc4_job *job = vc4->job; in vc4_emit_state() 32 if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT | in vc4_emit_state() 34 float *vpscale = vc4->viewport.scale; in vc4_emit_state() 35 float *vptranslate = vc4->viewport.translate; in vc4_emit_state() 50 if (!vc4->rasterizer->base.scissor) { in vc4_emit_state() 56 minx = MAX2(vp_minx, vc4->scissor.minx); in vc4_emit_state() 57 miny = MAX2(vp_miny, vc4->scissor.miny); in vc4_emit_state() 58 maxx = MAX2(MIN2(vp_maxx, vc4->scissor.maxx), minx); in vc4_emit_state() 59 maxy = MAX2(MIN2(vp_maxy, vc4->scissor.maxy), miny); in vc4_emit_state() [all …]
|
D | vc4_register_allocate.c | 109 vc4_alloc_reg_set(struct vc4_context *vc4) in vc4_alloc_reg_set() argument 115 if (vc4->regs) in vc4_alloc_reg_set() 118 vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs), true); in vc4_alloc_reg_set() 125 vc4->reg_class_any[i] = ra_alloc_reg_class(vc4->regs); in vc4_alloc_reg_set() 126 vc4->reg_class_a_or_b[i] = ra_alloc_reg_class(vc4->regs); in vc4_alloc_reg_set() 127 vc4->reg_class_a_or_b_or_acc[i] = ra_alloc_reg_class(vc4->regs); in vc4_alloc_reg_set() 128 vc4->reg_class_r4_or_a[i] = ra_alloc_reg_class(vc4->regs); in vc4_alloc_reg_set() 129 vc4->reg_class_a[i] = ra_alloc_reg_class(vc4->regs); in vc4_alloc_reg_set() 131 vc4->reg_class_r0_r3 = ra_alloc_reg_class(vc4->regs); in vc4_alloc_reg_set() 135 ra_class_add_reg(vc4->regs, vc4->reg_class_r0_r3, i); in vc4_alloc_reg_set() [all …]
|
D | vc4_blit.c | 54 struct vc4_context *vc4 = vc4_context(pctx); in vc4_tile_blit() local 130 vc4_flush_jobs_reading_resource(vc4, info->src.resource); in vc4_tile_blit() 132 struct vc4_job *job = vc4_get_job(vc4, dst_surf, NULL); in vc4_tile_blit() 157 vc4_job_submit(vc4, job); in vc4_tile_blit() 166 vc4_blitter_save(struct vc4_context *vc4) in vc4_blitter_save() argument 168 util_blitter_save_fragment_constant_buffer_slot(vc4->blitter, in vc4_blitter_save() 169 vc4->constbuf[PIPE_SHADER_FRAGMENT].cb); in vc4_blitter_save() 170 util_blitter_save_vertex_buffer_slot(vc4->blitter, vc4->vertexbuf.vb); in vc4_blitter_save() 171 util_blitter_save_vertex_elements(vc4->blitter, vc4->vtx); in vc4_blitter_save() 172 util_blitter_save_vertex_shader(vc4->blitter, vc4->prog.bind_vs); in vc4_blitter_save() [all …]
|
D | vc4_state.c | 54 struct vc4_context *vc4 = vc4_context(pctx); in vc4_set_blend_color() local 55 vc4->blend_color.f = *blend_color; in vc4_set_blend_color() 57 vc4->blend_color.ub[i] = float_to_ubyte(blend_color->color[i]); in vc4_set_blend_color() 58 vc4->dirty |= VC4_DIRTY_BLEND_COLOR; in vc4_set_blend_color() 65 struct vc4_context *vc4 = vc4_context(pctx); in vc4_set_stencil_ref() local 66 vc4->stencil_ref =* stencil_ref; in vc4_set_stencil_ref() 67 vc4->dirty |= VC4_DIRTY_STENCIL_REF; in vc4_set_stencil_ref() 74 struct vc4_context *vc4 = vc4_context(pctx); in vc4_set_clip_state() local 75 vc4->clip = *clip; in vc4_set_clip_state() 76 vc4->dirty |= VC4_DIRTY_CLIP; in vc4_set_clip_state() [all …]
|
D | vc4_fence.c | 110 struct vc4_context *vc4 = vc4_context(pctx); in vc4_fence_create_fd() local 114 *fence = vc4_fence_create(vc4->screen, vc4->last_emit_seqno, in vc4_fence_create_fd() 122 struct vc4_context *vc4 = vc4_context(pctx); in vc4_fence_server_sync() local 126 sync_accumulate("vc4", &vc4->in_fence_fd, fence->fd); in vc4_fence_server_sync() 138 vc4_fence_context_init(struct vc4_context *vc4) in vc4_fence_context_init() argument 140 vc4->base.create_fence_fd = vc4_fence_create_fd; in vc4_fence_context_init() 141 vc4->base.fence_server_sync = vc4_fence_server_sync; in vc4_fence_context_init() 142 vc4->in_fence_fd = -1; in vc4_fence_context_init() 147 if (vc4->screen->has_syncobj) { in vc4_fence_context_init() 148 return drmSyncobjCreate(vc4->fd, DRM_SYNCOBJ_CREATE_SIGNALED, in vc4_fence_context_init() [all …]
|
D | vc4_program.c | 1087 /* This should always be lowered to ALU operations for VC4. */ in ntq_emit_alu() 1731 "vc4 doesn't support indirect inputs"); in ntq_emit_load_input() 1827 "vc4 doesn't support indirect outputs"); in ntq_emit_intrinsic() 1903 if (!c->vc4->screen->has_control_flow) { in ntq_emit_if() 2058 if (!c->vc4->screen->has_control_flow) { in ntq_emit_loop() 2219 vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, in vc4_shader_ntq() argument 2224 c->vc4 = vc4; in vc4_shader_ntq() 2414 vc4_generate_code(vc4, c); in vc4_shader_ntq() 2436 struct vc4_context *vc4 = vc4_context(pctx); in vc4_shader_state_create() local 2441 so->program_id = vc4->next_uncompiled_program_id++; in vc4_shader_state_create() [all …]
|
D | vc4_uniforms.c | 191 vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, in vc4_write_uniforms() argument 196 struct vc4_job *job = vc4->job; in vc4_write_uniforms() 219 cl_aligned_f(&uniforms, vc4->viewport.scale[0] * 16.0f); in vc4_write_uniforms() 222 cl_aligned_f(&uniforms, vc4->viewport.scale[1] * 16.0f); in vc4_write_uniforms() 226 cl_aligned_f(&uniforms, vc4->viewport.translate[2]); in vc4_write_uniforms() 229 cl_aligned_f(&uniforms, vc4->viewport.scale[2]); in vc4_write_uniforms() 234 vc4->clip.ucp[data / 4][data % 4]); in vc4_write_uniforms() 260 u_upload_data(vc4->uploader, 0, in vc4_write_uniforms() 306 CLAMP(vc4->blend_color.f.color[uinfo->contents[i] - in vc4_write_uniforms() 313 vc4_get_format_swizzle(vc4->framebuffer.cbufs[0]->format); in vc4_write_uniforms() [all …]
|
D | vc4_context.h | 142 * VC4_DIRTY_* flags that, when set in vc4->dirty, mean that the 195 /* Hash table key for vc4->jobs */ 433 if (unlikely(vc4->debug.debug_message)) \ 434 pipe_debug_message(&vc4->debug, PERF_INFO, __VA_ARGS__); \ 483 void vc4_write_uniforms(struct vc4_context *vc4, 489 int vc4_job_init(struct vc4_context *vc4); 490 int vc4_fence_context_init(struct vc4_context *vc4); 491 struct vc4_job *vc4_get_job(struct vc4_context *vc4, 494 struct vc4_job *vc4_get_job_for_fbo(struct vc4_context *vc4); 496 void vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job); [all …]
|
/external/mesa3d/docs/drivers/ |
D | vc4.rst | 1 VC4 title 4 Mesa's ``vc4`` graphics driver supports multiple implementations of 11 This Mesa driver talks directly to the `vc4 12 <https://www.kernel.org/doc/html/latest/gpu/vc4.html>`__ kernel DRM 21 The vc4 driver is a nearly conformant GLES2 driver, and the hardware 32 GLES2.0, and vc4, don't have ``GL_UNSIGNED_INT`` index buffers. To support 33 them in vc4, we create a shadow copy of your index buffer with the 46 The VC4 hardware has no support for occlusion queries. GL 2.0 56 VC4 doesn't support reducing triangles/quads/polygons to lines and 67 VC4 rendering bugs should go to Mesa's gitlab `issues [all …]
|
/external/igt-gpu-tools/tests/vc4_ci/ |
D | README | 1 This directory contains test lists to be used for vc4's DRM support. The files 14 Changing the test lists should only happen with approval from the vc4 18 vc4.testlist 22 restriction for the vc4 DRM driver, combining generic DRM and KMS tests. 25 vc4-chamelium.testlist 28 This test list is meant to test the vc4 driver using Google's Chamelium 33 vc4-chamelium-fast.testlist
|
/external/XNNPACK/src/f32-vscaleextexp/gen/ |
D | avx512f-p5-scalef-x176.c | 36 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() local 97 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 98 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 99 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 100 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 101 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 102 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 103 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 104 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() 105 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x176() [all …]
|
D | avx512f-p5-scalef-x192.c | 36 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 101 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 102 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 103 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 104 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 105 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 106 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 107 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 108 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 109 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 35 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() local 114 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 115 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 116 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 117 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 118 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 119 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 120 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 121 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() 122 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 35 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 115 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 116 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 117 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 118 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 119 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 120 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 121 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 122 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 123 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
D | avx512f-p5-scalef-x192-acc6.c | 35 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 119 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 120 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 121 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 122 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 123 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 124 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 125 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 126 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 127 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
D | avx512f-p5-scalef-x192-acc6.c | 34 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 118 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 119 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 120 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 121 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 122 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 123 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 124 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 125 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 126 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
D | avx512f-p5-scalef-x192-acc3.c | 34 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() local 115 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 116 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 117 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 118 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 119 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 120 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 121 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 122 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() 123 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192.c | 34 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 113 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 114 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 115 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 116 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 117 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 118 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 119 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 120 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 121 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
D | avx512f-p5-scalef-x192-acc2.c | 34 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 114 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 115 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 116 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 117 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 118 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 119 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 120 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 121 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 122 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
D | avx512f-p5-scalef-x192.c | 35 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 114 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 115 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 116 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 117 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 118 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 119 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 120 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 121 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 122 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
/external/XNNPACK/src/f32-raddextexp/gen/ |
D | avx512f-p5-scalef-x192-acc3.c | 35 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() local 105 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 106 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 107 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 108 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 109 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 110 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 111 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 112 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() 113 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192_acc3() [all …]
|
D | avx512f-p5-scalef-x192.c | 35 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() local 101 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 102 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 103 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 104 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 105 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 106 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 107 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 108 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() 109 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|