• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_KERNELS_EIGEN_SPATIAL_CONVOLUTIONS_H_
17 #define TENSORFLOW_CORE_KERNELS_EIGEN_SPATIAL_CONVOLUTIONS_H_
18 
19 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
20 
21 #if defined(TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL)
22 #include "tensorflow/core/kernels/eigen_contraction_kernel.h"
23 
24 namespace Eigen {
25 namespace internal {
26 // Pack a block of the right input matrix (in our case it's always a
27 // "virtual matrix" constructed from extracted image patches) in contiguous
28 // block in column-major storage order. Knowing the properties of the
29 // original patch op we can do it more efficient than the default
30 // gemm_pack_colmajor_block.
31 template <typename NewDimension, Index Rows, Index Cols, typename ArgType,
32           typename Device, typename Scalar, typename StorageIndex,
33           typename nocontract_t, typename contract_t, int packet_size,
34           bool inner_dim_contiguous, bool inner_dim_reordered, int Alignment>
35 struct gemm_pack_colmajor_block<
36     Scalar, StorageIndex,
37     TensorContractionSubMapper<
38         Scalar, StorageIndex, Rhs,
39         TensorEvaluator<
40             const TensorReshapingOp<
41                 NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >,
42             Device>,
43         nocontract_t, contract_t, packet_size, inner_dim_contiguous,
44         inner_dim_reordered, Alignment>,
45     ColMajor> {
46   typedef TensorContractionSubMapper<
47       Scalar, StorageIndex, Rhs,
48       TensorEvaluator<
49           const TensorReshapingOp<
50               NewDimension, const TensorImagePatchOp<Rows, Cols, ArgType> >,
51           Device>,
52       nocontract_t, contract_t, packet_size, inner_dim_contiguous,
53       inner_dim_reordered, Alignment>
54       SubMapper;
55 
56   typedef SubMapper DataMapper;
57   typedef typename packet_traits<Scalar>::type Packet;
58 
59   EIGEN_DONT_INLINE
60   void operator()(Scalar* block, const DataMapper rhs, StorageIndex rows,
61                   StorageIndex cols) {
62     const bool standard_patches = !rhs.nonStandardPatches();
63 
64     if (standard_patches && (rhs.patchDepth() % packet_size == 0)) {
65       // Single packet always belong to single patch (row, col).
66       packStandardPatches</*patch_depth_is_multiple_of_packet_size*/ true>(
67           block, rhs, rows, cols);
68 
69     } else if (standard_patches) {
70       // Single packet can span across multiple patch rows or columns.
71       packStandardPatches</*patch_depth_is_multiple_of_packet_size*/ false>(
72           block, rhs, rows, cols);
73 
74     } else {
75       // With non-standard patches we don't do any vectorized loads.
76       // TODO(ezhulenev): It doesn't look like that we should completely give up
77       // on packets. Make this code path faster!
78       for (StorageIndex col = 0; col < cols; ++col) {
79         SubMapper lm = rhs.getLinearMapper(0, col);
80         for (StorageIndex i = 0; i < rows; ++i) {
81           *block = lm(i);
82           ++block;
83         }
84       }
85     }
86   }
87 
88  private:
89   // Pack standard image patches:
90   //
91   // - patch_depth_is_multiple_of_packet_size=true: We are guaranteed to have
92   //   depth dimension size to be a multiple of packet size, so we can skip all
93   //   non vectorized loads and checks.
94   template <bool patch_depth_is_multiple_of_packet_size>
95   EIGEN_ALWAYS_INLINE void packStandardPatches(Scalar* block,
96                                                const DataMapper rhs,
97                                                StorageIndex rows,
98                                                StorageIndex cols) {
99     eigen_assert(!rhs.nonStandardPatches());
100 
101     // Give vectorized_rows the name used in all other gemm_pack_rhs above.
102     const StorageIndex peeled_k = (rows / packet_size) * packet_size;
103 
104     const StorageIndex start_col = rhs.colOffset();
105     const StorageIndex max_col = rhs.maxCol(peeled_k);
106 
107     for (StorageIndex col = 0; col < cols; ++col) {
108       SubMapper lm = rhs.getLinearMapper(0, col);
109 
110       StorageIndex k = 0;
111       for (Index c = start_col; c < max_col; ++c) {
112         eigen_assert(k <= peeled_k);
113 
114         const StorageIndex start_row = (c == start_col) ? rhs.rowOffset() : 0;
115         const StorageIndex max_row = rhs.maxRow(peeled_k, c);
116         const bool pad_col = lm.padCol(c);
117 
118         // We can squeeze reads for all rows in [start_row, max_row) range.
119         if (!pad_col && !lm.padAnyRow(start_row, max_row - 1)) {
120           const StorageIndex start_depth =
121               (c == start_col) ? rhs.depthOffset() : 0;
122 
123           const StorageIndex max_depth =
124               std::min<StorageIndex>(start_depth + (peeled_k - k),
125                                      (max_row - start_row) * rhs.patchDepth());
126 
127           const StorageIndex base_idx = lm.baseIndex(start_row, c);
128 
129           if (patch_depth_is_multiple_of_packet_size) {
130             // If patch depth is a multiple of packet size, it's guaranteed that
131             // we can process all values in depth dimension with packets.
132             eigen_assert((max_depth - start_depth) % packet_size == 0);
133             StorageIndex d = start_depth;
134 
135             for (; d < max_depth; d += packet_size) {
136               eigen_assert(k < peeled_k);
137               internal::pstoreu(block, rhs.packetNoPadding(d, base_idx));
138               block += packet_size;
139               k += packet_size;
140             }
141 
142           } else {
143             StorageIndex d = start_depth;
144             const StorageIndex vectorized_depth = max_depth - packet_size;
145 
146             for (; d <= vectorized_depth; d += packet_size) {
147               eigen_assert(k < peeled_k);
148               internal::pstoreu(block, rhs.packetNoPadding(d, base_idx));
149               block += packet_size;
150               k += packet_size;
151             }
152             for (; d < max_depth; d++) {
153               eigen_assert(k < peeled_k);
154               *block = rhs.coeffNoPadding(d, base_idx);
155               ++block;
156               ++k;
157             }
158           }
159 
160           // Go to the next column.
161           continue;
162         }
163 
164         // If we are not allowed to squeeze reads along the `row` and `depth`
165         // dimensions, we must process rows one by one.
166         for (StorageIndex r = start_row; r < max_row; ++r) {
167           eigen_assert(k <= peeled_k);
168 
169           const StorageIndex start_depth =
170               ((c == start_col) && (r == start_row)) ? rhs.depthOffset() : 0;
171           const StorageIndex max_depth =
172               rhs.maxDepth(peeled_k - k, start_depth);
173 
174           const bool pad = pad_col || lm.padRow(r);
175           const StorageIndex base_idx = lm.baseIndex(r, c);
176 
177           if (patch_depth_is_multiple_of_packet_size) {
178             // If patch depth is a multiple of packet size, it's guaranteed that
179             // we can process all values in depth dimension with packets.
180             eigen_assert((max_depth - start_depth) % packet_size == 0);
181             StorageIndex d = start_depth;
182 
183             for (; d < max_depth; d += packet_size) {
184               eigen_assert(k < peeled_k);
185               const Packet p = pad ? pset1<Packet>(Scalar(0))
186                                    : rhs.packetNoPadding(d, base_idx);
187               internal::pstoreu(block, p);
188               block += packet_size;
189               k += packet_size;
190             }
191 
192           } else {
193             const StorageIndex max_vectorized_depth = max_depth - packet_size;
194             StorageIndex d = start_depth;
195             for (; d < max_vectorized_depth; d += packet_size) {
196               eigen_assert(k < peeled_k);
197               const Packet p = pad ? pset1<Packet>(Scalar(0))
198                                    : rhs.packetNoPadding(d, base_idx);
199               internal::pstoreu(block, p);
200               block += packet_size;
201               k += packet_size;
202             }
203             for (; d < max_depth; d++) {
204               eigen_assert(k < peeled_k);
205               *block = pad ? Scalar(0) : rhs.coeffNoPadding(d, base_idx);
206               ++block;
207               ++k;
208             }
209           }
210         }
211       }
212 
213       // The loop above should fill peeled_k elements.
214       eigen_assert(peeled_k == k);
215 
216       // Fill remaining elements using loadCoeffStandard.
217       for (; k < rows; ++k) {
218         *block = lm.loadCoeffStandard(k);
219         ++block;
220       }
221     }
222   }
223 };
224 }  // end namespace internal
225 }  // end namespace Eigen
226 #endif  // defined(TENSORFLOW_USE_CUSTOM_CONTRACTION_KERNEL)
227 
228 // Note the following header is used in both TF and TFLite. Particularly, it's
229 // used for float TFLite Conv2D.
230 #include "tensorflow/core/kernels/eigen_spatial_convolutions-inl.h"
231 
232 #endif  // TENSORFLOW_CORE_KERNELS_EIGEN_SPATIAL_CONVOLUTIONS_H_
233