• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Mesa 3-D graphics library
3  *
4  * Copyright (C) 2012-2015 LunarG, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Chia-I Wu <olv@lunarg.com>
26  */
27 
28 #include "ilo_debug.h"
29 #include "ilo_vma.h"
30 #include "ilo_state_sol.h"
31 
32 static bool
sol_stream_validate_gen7(const struct ilo_dev * dev,const struct ilo_state_sol_stream_info * stream)33 sol_stream_validate_gen7(const struct ilo_dev *dev,
34                          const struct ilo_state_sol_stream_info *stream)
35 {
36    uint8_t i;
37 
38    ILO_DEV_ASSERT(dev, 7, 8);
39 
40    assert(stream->vue_read_base + stream->vue_read_count <=
41          stream->cv_vue_attr_count);
42 
43    /*
44     * From the Ivy Bridge PRM, volume 2 part 1, page 200:
45     *
46     *     "(Stream 0 Vertex Read Offset)
47     *      Format: U1 count of 256-bit units
48     *
49     *      Specifies amount of data to skip over before reading back Stream 0
50     *      vertex data. Must be zero if the GS is enabled and the Output
51     *      Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B
52     *      unit)."
53     *
54     *     "(Stream 0 Vertex Read Length)
55     *      Format: U5-1 count of 256-bit units
56     *
57     *      Specifies amount of vertex data to read back for Stream 0 vertices,
58     *      starting at the Stream 0 Vertex Read Offset location. Maximum
59     *      readback is 17 256-bit units (34 128-bit vertex attributes). Read
60     *      data past the end of the valid vertex data has undefined contents,
61     *      and therefore shouldn't be used to source stream out data.  Must be
62     *      zero (i.e., read length = 256b) if the GS is enabled and the Output
63     *      Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B
64     *      unit)."
65     */
66    assert(stream->vue_read_base == 0 || stream->vue_read_base == 2);
67    assert(stream->vue_read_count <= 34);
68 
69    assert(stream->decl_count <= ILO_STATE_SOL_MAX_DECL_COUNT);
70 
71    for (i = 0; i < stream->decl_count; i++) {
72       const struct ilo_state_sol_decl_info *decl = &stream->decls[i];
73 
74       assert(decl->is_hole || decl->attr < stream->vue_read_count);
75 
76       /*
77        * From the Ivy Bridge PRM, volume 2 part 1, page 205:
78        *
79        *     "There is only enough internal storage for the 128-bit vertex
80        *      header and 32 128-bit vertex attributes."
81        */
82       assert(decl->attr < 33);
83 
84       assert(decl->component_base < 4 &&
85              decl->component_base + decl->component_count <= 4);
86       assert(decl->buffer < ILO_STATE_SOL_MAX_BUFFER_COUNT);
87    }
88 
89    return true;
90 }
91 
92 static bool
sol_validate_gen7(const struct ilo_dev * dev,const struct ilo_state_sol_info * info)93 sol_validate_gen7(const struct ilo_dev *dev,
94                   const struct ilo_state_sol_info *info)
95 {
96    uint8_t i;
97 
98    ILO_DEV_ASSERT(dev, 7, 8);
99 
100    /*
101     * From the Ivy Bridge PRM, volume 2 part 1, page 198:
102     *
103     *     "This bit (Render Stream Select) is used even if SO Function Enable
104     *      is DISABLED."
105     *
106     * From the Haswell PRM, volume 2b, page 796:
107     *
108     *     "SO Function Enable must also be ENABLED in order for thiis field
109     *      (Render Stream Select) to select a stream for rendering. When SO
110     *      Function Enable is DISABLED and Rendering Disable is cleared (i.e.,
111     *      rendering is enabled), StreamID is ignored downstream of the SO
112     *      stage, allowing any stream to be rendered."
113     *
114     * We want Gen7 behavior, but we have to require users to follow Gen7.5
115     * behavior: info->sol_enable must be set for info->render_stream to work.
116     */
117 
118    for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
119       if (!sol_stream_validate_gen7(dev, &info->streams[i]))
120          return false;
121    }
122 
123    /*
124     * From the Ivy Bridge PRM, volume 2 part 1, page 208:
125     *
126     *     "(Surface Pitch)
127     *      [0,2048]  Must be 0 or a multiple of 4 Bytes."
128     */
129    for (i = 0; i < ARRAY_SIZE(info->buffer_strides); i++) {
130       assert(info->buffer_strides[i] <= 2048 &&
131              info->buffer_strides[i] % 4 == 0);
132    }
133 
134    return true;
135 }
136 
137 static bool
sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol * sol,const struct ilo_dev * dev,const struct ilo_state_sol_info * info)138 sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol *sol,
139                                const struct ilo_dev *dev,
140                                const struct ilo_state_sol_info *info)
141 {
142    struct {
143       uint8_t offset;
144       uint8_t len;
145    } vue_read[ILO_STATE_SOL_MAX_STREAM_COUNT];
146    uint8_t i;
147    uint32_t dw1, dw2;
148 
149    ILO_DEV_ASSERT(dev, 7, 8);
150 
151    if (!sol_validate_gen7(dev, info))
152       return false;
153 
154    for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
155       const struct ilo_state_sol_stream_info *stream = &info->streams[i];
156 
157       vue_read[i].offset = stream->vue_read_base / 2;
158       /*
159        * In pairs minus 1.  URB entries are aligned to 512-bits.  There is no
160        * need to worry about reading past entries.
161        */
162       vue_read[i].len = (stream->vue_read_count + 1) / 2;
163       if (vue_read[i].len)
164          vue_read[i].len--;
165    }
166 
167    dw1 = info->render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT |
168          info->tristrip_reorder << GEN7_SO_DW1_REORDER_MODE__SHIFT;
169 
170    if (info->sol_enable)
171       dw1 |= GEN7_SO_DW1_SO_ENABLE;
172 
173    if (info->render_disable)
174       dw1 |= GEN7_SO_DW1_RENDER_DISABLE;
175 
176    if (info->stats_enable)
177       dw1 |= GEN7_SO_DW1_STATISTICS;
178 
179    if (ilo_dev_gen(dev) < ILO_GEN(8)) {
180       const uint8_t buffer_enables = ((bool) info->buffer_strides[3]) << 3 |
181                                      ((bool) info->buffer_strides[2]) << 2 |
182                                      ((bool) info->buffer_strides[1]) << 1 |
183                                      ((bool) info->buffer_strides[0]);
184       dw1 |= buffer_enables << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT;
185    }
186 
187    dw2 = vue_read[3].offset << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
188          vue_read[3].len << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
189          vue_read[2].offset << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
190          vue_read[2].len << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
191          vue_read[1].offset << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
192          vue_read[1].len << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
193          vue_read[0].offset << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
194          vue_read[0].len << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
195 
196    STATIC_ASSERT(ARRAY_SIZE(sol->streamout) >= 2);
197    sol->streamout[0] = dw1;
198    sol->streamout[1] = dw2;
199 
200    memcpy(sol->strides, info->buffer_strides, sizeof(sol->strides));
201 
202    return true;
203 }
204 
205 static bool
sol_set_gen7_3DSTATE_SO_DECL_LIST(struct ilo_state_sol * sol,const struct ilo_dev * dev,const struct ilo_state_sol_info * info,uint8_t max_decl_count)206 sol_set_gen7_3DSTATE_SO_DECL_LIST(struct ilo_state_sol *sol,
207                                   const struct ilo_dev *dev,
208                                   const struct ilo_state_sol_info *info,
209                                   uint8_t max_decl_count)
210 {
211    uint64_t decl_list[ILO_STATE_SOL_MAX_DECL_COUNT];
212    uint8_t decl_counts[ILO_STATE_SOL_MAX_STREAM_COUNT];
213    uint8_t buffer_selects[ILO_STATE_SOL_MAX_STREAM_COUNT];
214    uint32_t dw1, dw2;
215    uint8_t i, j;
216 
217    ILO_DEV_ASSERT(dev, 7, 8);
218 
219    memset(decl_list, 0, sizeof(decl_list[0]) * max_decl_count);
220 
221    for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
222       const struct ilo_state_sol_stream_info *stream = &info->streams[i];
223 
224       assert(stream->decl_count <= max_decl_count);
225       decl_counts[i] = stream->decl_count;
226       buffer_selects[i] = 0;
227 
228       for (j = 0; j < stream->decl_count; j++) {
229          const struct ilo_state_sol_decl_info *decl = &stream->decls[j];
230          const uint8_t mask = ((1 << decl->component_count) - 1) <<
231             decl->component_base;
232          uint16_t val;
233 
234          val = decl->buffer << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
235                mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
236 
237          if (decl->is_hole)
238             val |= GEN7_SO_DECL_HOLE_FLAG;
239          else
240             val |= decl->attr << GEN7_SO_DECL_REG_INDEX__SHIFT;
241 
242          decl_list[j] |= (uint64_t) val << (16 * i);
243          buffer_selects[i] |= 1 << decl->buffer;
244       }
245    }
246 
247    dw1 = buffer_selects[3] << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
248          buffer_selects[2] << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
249          buffer_selects[1] << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
250          buffer_selects[0] << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
251    dw2 = decl_counts[3] << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
252          decl_counts[2] << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
253          decl_counts[1] << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
254          decl_counts[0] << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
255 
256    STATIC_ASSERT(ARRAY_SIZE(sol->so_decl) >= 2);
257    sol->so_decl[0] = dw1;
258    sol->so_decl[1] = dw2;
259 
260    STATIC_ASSERT(ARRAY_SIZE(sol->decl[0]) == 2);
261    memcpy(sol->decl, decl_list, sizeof(sol->decl[0]) * max_decl_count);
262    sol->decl_count = max_decl_count;
263 
264    return true;
265 }
266 
267 static bool
sol_buffer_validate_gen7(const struct ilo_dev * dev,const struct ilo_state_sol_buffer_info * info)268 sol_buffer_validate_gen7(const struct ilo_dev *dev,
269                          const struct ilo_state_sol_buffer_info *info)
270 {
271    ILO_DEV_ASSERT(dev, 7, 8);
272 
273    /*
274     * From the Ivy Bridge PRM, volume 2 part 1, page 208:
275     *
276     *     "(Surface Base Address) This field specifies the starting DWord
277     *      address..."
278     */
279    assert(info->offset % 4 == 0);
280 
281    if (info->vma) {
282       assert(info->vma->vm_alignment % 4 == 0);
283       assert(info->size && info->offset + info->size <= info->vma->vm_size);
284    }
285 
286    /* Gen8+ only */
287    if (info->write_offset_load || info->write_offset_save) {
288       assert(ilo_dev_gen(dev) >= ILO_GEN(8) && info->write_offset_vma);
289       assert(info->write_offset_offset + sizeof(uint32_t) <=
290             info->write_offset_vma->vm_size);
291    }
292 
293    /*
294     * From the Broadwell PRM, volume 2b, page 206:
295     *
296     *     "This field (Stream Offset) specifies the Offset in stream output
297     *      buffer to start at, or whether to append to the end of an existing
298     *      buffer. The Offset must be DWORD aligned."
299     */
300    if (info->write_offset_imm_enable) {
301       assert(info->write_offset_load);
302       assert(info->write_offset_imm % 4 == 0);
303    }
304 
305    return true;
306 }
307 
308 static uint32_t
sol_buffer_get_gen6_size(const struct ilo_dev * dev,const struct ilo_state_sol_buffer_info * info)309 sol_buffer_get_gen6_size(const struct ilo_dev *dev,
310                          const struct ilo_state_sol_buffer_info *info)
311 {
312    ILO_DEV_ASSERT(dev, 6, 8);
313 
314    /*
315     * From the Ivy Bridge PRM, volume 2 part 1, page 208:
316     *
317     *     "(Surface End Address) This field specifies the ending DWord
318     *      address..."
319     */
320    return (info->vma) ? info->size & ~3 : 0;
321 }
322 
323 static bool
sol_buffer_set_gen7_3dstate_so_buffer(struct ilo_state_sol_buffer * sb,const struct ilo_dev * dev,const struct ilo_state_sol_buffer_info * info)324 sol_buffer_set_gen7_3dstate_so_buffer(struct ilo_state_sol_buffer *sb,
325                                       const struct ilo_dev *dev,
326                                       const struct ilo_state_sol_buffer_info *info)
327 {
328    const uint32_t size = sol_buffer_get_gen6_size(dev, info);
329 
330    ILO_DEV_ASSERT(dev, 7, 7.5);
331 
332    if (!sol_buffer_validate_gen7(dev, info))
333       return false;
334 
335    STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 2);
336    sb->so_buf[0] = info->offset;
337    sb->so_buf[1] = (size) ? info->offset + size : 0;
338 
339    return true;
340 }
341 
342 static bool
sol_buffer_set_gen8_3dstate_so_buffer(struct ilo_state_sol_buffer * sb,const struct ilo_dev * dev,const struct ilo_state_sol_buffer_info * info)343 sol_buffer_set_gen8_3dstate_so_buffer(struct ilo_state_sol_buffer *sb,
344                                       const struct ilo_dev *dev,
345                                       const struct ilo_state_sol_buffer_info *info)
346 {
347    const uint32_t size = sol_buffer_get_gen6_size(dev, info);
348    uint32_t dw1;
349 
350    ILO_DEV_ASSERT(dev, 8, 8);
351 
352    if (!sol_buffer_validate_gen7(dev, info))
353       return false;
354 
355    dw1 = 0;
356 
357    if (info->vma)
358       dw1 |= GEN8_SO_BUF_DW1_ENABLE;
359    if (info->write_offset_load)
360       dw1 |= GEN8_SO_BUF_DW1_OFFSET_WRITE_ENABLE;
361    if (info->write_offset_save)
362       dw1 |= GEN8_SO_BUF_DW1_OFFSET_ENABLE;
363 
364    STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 4);
365    sb->so_buf[0] = dw1;
366    sb->so_buf[1] = info->offset;
367 
368    /*
369     * From the Broadwell PRM, volume 2b, page 205:
370     *
371     *     "This field (Surface Size) specifies the size of buffer in number
372     *      DWords minus 1 of the buffer in Graphics Memory."
373     */
374    sb->so_buf[2] = (size) ? size / 4 - 1 : 0;
375 
376    /* load from imm or sb->write_offset_bo */
377    sb->so_buf[3] = (info->write_offset_imm_enable) ?
378       info->write_offset_imm : ~0u;
379 
380    return true;
381 }
382 
383 bool
ilo_state_sol_init(struct ilo_state_sol * sol,const struct ilo_dev * dev,const struct ilo_state_sol_info * info)384 ilo_state_sol_init(struct ilo_state_sol *sol,
385                    const struct ilo_dev *dev,
386                    const struct ilo_state_sol_info *info)
387 {
388    bool ret = true;
389 
390    assert(ilo_is_zeroed(sol, sizeof(*sol)));
391    assert(ilo_is_zeroed(info->data, info->data_size));
392 
393    if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
394       uint8_t max_decl_count, i;
395 
396       max_decl_count = info->streams[0].decl_count;
397       for (i = 1; i < ARRAY_SIZE(info->streams); i++) {
398          if (max_decl_count < info->streams[i].decl_count)
399             max_decl_count = info->streams[i].decl_count;
400       }
401 
402       assert(ilo_state_sol_data_size(dev, max_decl_count) <= info->data_size);
403       sol->decl = (uint32_t (*)[2]) info->data;
404 
405       ret &= sol_set_gen7_3DSTATE_STREAMOUT(sol, dev, info);
406       ret &= sol_set_gen7_3DSTATE_SO_DECL_LIST(sol, dev, info, max_decl_count);
407    }
408 
409    assert(ret);
410 
411    return ret;
412 }
413 
414 bool
ilo_state_sol_init_disabled(struct ilo_state_sol * sol,const struct ilo_dev * dev,bool render_disable)415 ilo_state_sol_init_disabled(struct ilo_state_sol *sol,
416                             const struct ilo_dev *dev,
417                             bool render_disable)
418 {
419    struct ilo_state_sol_info info;
420 
421    memset(&info, 0, sizeof(info));
422    info.render_disable = render_disable;
423 
424    return ilo_state_sol_init(sol, dev, &info);
425 }
426 
427 uint32_t
ilo_state_sol_buffer_size(const struct ilo_dev * dev,uint32_t size,uint32_t * alignment)428 ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size,
429                           uint32_t *alignment)
430 {
431    /* DWord aligned without padding */
432    *alignment = 4;
433    return size;
434 }
435 
436 bool
ilo_state_sol_buffer_init(struct ilo_state_sol_buffer * sb,const struct ilo_dev * dev,const struct ilo_state_sol_buffer_info * info)437 ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
438                           const struct ilo_dev *dev,
439                           const struct ilo_state_sol_buffer_info *info)
440 {
441    bool ret = true;
442 
443    assert(ilo_is_zeroed(sb, sizeof(*sb)));
444 
445    if (ilo_dev_gen(dev) >= ILO_GEN(8))
446       ret &= sol_buffer_set_gen8_3dstate_so_buffer(sb, dev, info);
447    else
448       ret &= sol_buffer_set_gen7_3dstate_so_buffer(sb, dev, info);
449 
450    sb->vma = info->vma;
451    sb->write_offset_vma = info->write_offset_vma;
452 
453    assert(ret);
454 
455    return ret;
456 }
457 
458 bool
ilo_state_sol_buffer_init_disabled(struct ilo_state_sol_buffer * sb,const struct ilo_dev * dev)459 ilo_state_sol_buffer_init_disabled(struct ilo_state_sol_buffer *sb,
460                                    const struct ilo_dev *dev)
461 {
462    struct ilo_state_sol_buffer_info info;
463 
464    memset(&info, 0, sizeof(info));
465 
466    return ilo_state_sol_buffer_init(sb, dev, &info);
467 }
468