Lines Matching full:heads
35 f"heads ({num_attention_heads})"
69 batch, query_sequence, key_sequence, heads, features = dims()
70 heads.size = self.num_attention_heads
76 # The group (heads, features) splits apart a single positional dimension
77 # into two dimensions. Since heads.size*features.size == q.size(2)
78 # and we specified heads.size, features.size is inferred here.
79 q = q[batch, query_sequence, [heads, features]]
80 k = k[batch, key_sequence, [heads, features]]
81 v = v[batch, key_sequence, [heads, features]]
87 key_past = past_key_value[0][batch, heads, key_sequence, features]
88 value_past = past_key_value[1][batch, heads, key_sequence, features]
157 …# working in reverse to with_dims, the (heads, features) group flattens the dimensions into a sing…
158 return context_layer.order(batch, query_sequence, [heads, features])