1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_ 17 #define TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_ 18 19 #include <math.h> 20 21 #include "tensorflow/tools/android/test/jni/object_tracking/geom.h" 22 23 namespace tf_tracking { 24 25 // Arbitrary keypoint type ids for labeling the origin of tracked keypoints. 26 enum KeypointType { 27 KEYPOINT_TYPE_DEFAULT = 0, 28 KEYPOINT_TYPE_FAST = 1, 29 KEYPOINT_TYPE_INTEREST = 2 30 }; 31 32 // Struct that can be used to more richly store the results of a detection 33 // than a single number, while still maintaining comparability. 34 struct MatchScore { MatchScoreMatchScore35 explicit MatchScore(double val) : value(val) {} MatchScoreMatchScore36 MatchScore() { value = 0.0; } 37 38 double value; 39 40 MatchScore& operator+(const MatchScore& rhs) { 41 value += rhs.value; 42 return *this; 43 } 44 45 friend std::ostream& operator<<(std::ostream& stream, 46 const MatchScore& detection) { 47 stream << detection.value; 48 return stream; 49 } 50 }; 51 inline bool operator< (const MatchScore& cC1, const MatchScore& cC2) { 52 return cC1.value < cC2.value; 53 } 54 inline bool operator> (const MatchScore& cC1, const MatchScore& cC2) { 55 return cC1.value > cC2.value; 56 } 57 inline bool operator>= (const MatchScore& cC1, const MatchScore& cC2) { 58 return cC1.value >= cC2.value; 59 } 60 inline bool operator<= (const MatchScore& cC1, const MatchScore& cC2) { 61 return cC1.value <= cC2.value; 62 } 63 64 // Fixed seed used for all random number generators. 65 static const int kRandomNumberSeed = 11111; 66 67 // TODO(andrewharp): Move as many of these settings as possible into a settings 68 // object which can be passed in from Java at runtime. 69 70 // Whether or not to use ESM instead of LK flow. 71 static const bool kUseEsm = false; 72 73 // This constant gets added to the diagonal of the Hessian 74 // before solving for translation in 2dof ESM. 75 // It ensures better behavior especially in the absence of 76 // strong texture. 77 static const int kEsmRegularizer = 20; 78 79 // Do we want to brightness-normalize each keypoint patch when we compute 80 // its flow using ESM? 81 static const bool kDoBrightnessNormalize = true; 82 83 // Whether or not to use fixed-point interpolated pixel lookups in optical flow. 84 #define USE_FIXED_POINT_FLOW 1 85 86 // Whether to normalize keypoint windows for intensity in LK optical flow. 87 // This is a define for now because it helps keep the code streamlined. 88 #define NORMALIZE 1 89 90 // Number of keypoints to store per frame. 91 static const int kMaxKeypoints = 76; 92 93 // Keypoint detection. 94 static const int kMaxTempKeypoints = 1024; 95 96 // Number of floats each keypoint takes up when exporting to an array. 97 static const int kKeypointStep = 7; 98 99 // Number of frame deltas to keep around in the circular queue. 100 static const int kNumFrames = 512; 101 102 // Number of iterations to do tracking on each keypoint at each pyramid level. 103 static const int kNumIterations = 3; 104 105 // The number of bins (on a side) to divide each bin from the previous 106 // cache level into. Higher numbers will decrease performance by increasing 107 // cache misses, but mean that cache hits are more locally relevant. 108 static const int kCacheBranchFactor = 2; 109 110 // Number of levels to put in the cache. 111 // Each level of the cache is a square grid of bins, length: 112 // branch_factor^(level - 1) on each side. 113 // 114 // This may be greater than kNumPyramidLevels. Setting it to 0 means no 115 // caching is enabled. 116 static const int kNumCacheLevels = 3; 117 118 // The level at which the cache pyramid gets cut off and replaced by a matrix 119 // transform if such a matrix has been provided to the cache. 120 static const int kCacheCutoff = 1; 121 122 static const int kNumPyramidLevels = 4; 123 124 // The minimum number of keypoints needed in an object's area. 125 static const int kMaxKeypointsForObject = 16; 126 127 // Minimum number of pyramid levels to use after getting cached value. 128 // This allows fine-scale adjustment from the cached value, which is taken 129 // from the center of the corresponding top cache level box. 130 // Can be [0, kNumPyramidLevels). 131 static const int kMinNumPyramidLevelsToUseForAdjustment = 1; 132 133 // Window size to integrate over to find local image derivative. 134 static const int kFlowIntegrationWindowSize = 3; 135 136 // Total area of integration windows. 137 static const int kFlowArraySize = 138 (2 * kFlowIntegrationWindowSize + 1) * (2 * kFlowIntegrationWindowSize + 1); 139 140 // Error that's considered good enough to early abort tracking. 141 static const float kTrackingAbortThreshold = 0.03f; 142 143 // Maximum number of deviations a keypoint-correspondence delta can be from the 144 // weighted average before being thrown out for region-based queries. 145 static const float kNumDeviations = 2.0f; 146 147 // The length of the allowed delta between the forward and the backward 148 // flow deltas in terms of the length of the forward flow vector. 149 static const float kMaxForwardBackwardErrorAllowed = 0.5f; 150 151 // Threshold for pixels to be considered different. 152 static const int kFastDiffAmount = 10; 153 154 // How far from edge of frame to stop looking for FAST keypoints. 155 static const int kFastBorderBuffer = 10; 156 157 // Determines if non-detected arbitrary keypoints should be added to regions. 158 // This will help if no keypoints have been detected in the region yet. 159 static const bool kAddArbitraryKeypoints = true; 160 161 // How many arbitrary keypoints to add along each axis as candidates for each 162 // region? 163 static const int kNumToAddAsCandidates = 1; 164 165 // In terms of region dimensions, how closely can we place keypoints 166 // next to each other? 167 static const float kClosestPercent = 0.6f; 168 169 // How many FAST qualifying pixels must be connected to a pixel for it to be 170 // considered a candidate keypoint for Harris filtering. 171 static const int kMinNumConnectedForFastKeypoint = 8; 172 173 // Size of the window to integrate over for Harris filtering. 174 // Compare to kFlowIntegrationWindowSize. 175 static const int kHarrisWindowSize = 2; 176 177 178 // DETECTOR PARAMETERS 179 180 // Before relocalizing, make sure the new proposed position is better than 181 // the existing position by a small amount to prevent thrashing. 182 static const MatchScore kMatchScoreBuffer(0.01f); 183 184 // Minimum score a tracked object can have and still be considered a match. 185 // TODO(andrewharp): Make this a per detector thing. 186 static const MatchScore kMinimumMatchScore(0.5f); 187 188 static const float kMinimumCorrelationForTracking = 0.4f; 189 190 static const MatchScore kMatchScoreForImmediateTermination(0.0f); 191 192 // Run the detector every N frames. 193 static const int kDetectEveryNFrames = 4; 194 195 // How many features does each feature_set contain? 196 static const int kFeaturesPerFeatureSet = 10; 197 198 // The number of FeatureSets managed by the object detector. 199 // More FeatureSets can increase recall at the cost of performance. 200 static const int kNumFeatureSets = 7; 201 202 // How many FeatureSets must respond affirmatively for a candidate descriptor 203 // and position to be given more thorough attention? 204 static const int kNumFeatureSetsForCandidate = 2; 205 206 // How large the thumbnails used for correlation validation are. Used for both 207 // width and height. 208 static const int kNormalizedThumbnailSize = 11; 209 210 // The area of intersection divided by union for the bounding boxes that tells 211 // if this tracking has slipped enough to invalidate all unlocked examples. 212 static const float kPositionOverlapThreshold = 0.6f; 213 214 // The number of detection failures allowed before an object goes invisible. 215 // Tracking will still occur, so if it is actually still being tracked and 216 // comes back into a detectable position, it's likely to be found. 217 static const int kMaxNumDetectionFailures = 4; 218 219 220 // Minimum square size to scan with sliding window. 221 static const float kScanMinSquareSize = 16.0f; 222 223 // Minimum square size to scan with sliding window. 224 static const float kScanMaxSquareSize = 64.0f; 225 226 // Scale difference for consecutive scans of the sliding window. 227 static const float kScanScaleFactor = sqrtf(2.0f); 228 229 // Step size for sliding window. 230 static const int kScanStepSize = 10; 231 232 233 // How tightly to pack the descriptor boxes for confirmed exemplars. 234 static const float kLockedScaleFactor = 1 / sqrtf(2.0f); 235 236 // How tightly to pack the descriptor boxes for unconfirmed exemplars. 237 static const float kUnlockedScaleFactor = 1 / 2.0f; 238 239 // How tightly the boxes to scan centered at the last known position will be 240 // packed. 241 static const float kLastKnownPositionScaleFactor = 1.0f / sqrtf(2.0f); 242 243 // The bounds on how close a new object example must be to existing object 244 // examples for detection to be valid. 245 static const float kMinCorrelationForNewExample = 0.75f; 246 static const float kMaxCorrelationForNewExample = 0.99f; 247 248 249 // The number of safe tries an exemplar has after being created before 250 // missed detections count against it. 251 static const int kFreeTries = 5; 252 253 // A false positive is worth this many missed detections. 254 static const int kFalsePositivePenalty = 5; 255 256 struct ObjectDetectorConfig { 257 const Size image_size; 258 ObjectDetectorConfigObjectDetectorConfig259 explicit ObjectDetectorConfig(const Size& image_size) 260 : image_size(image_size) {} 261 virtual ~ObjectDetectorConfig() = default; 262 }; 263 264 struct KeypointDetectorConfig { 265 const Size image_size; 266 267 bool detect_skin; 268 KeypointDetectorConfigKeypointDetectorConfig269 explicit KeypointDetectorConfig(const Size& image_size) 270 : image_size(image_size), 271 detect_skin(false) {} 272 }; 273 274 275 struct OpticalFlowConfig { 276 const Size image_size; 277 OpticalFlowConfigOpticalFlowConfig278 explicit OpticalFlowConfig(const Size& image_size) 279 : image_size(image_size) {} 280 }; 281 282 struct TrackerConfig { 283 const Size image_size; 284 KeypointDetectorConfig keypoint_detector_config; 285 OpticalFlowConfig flow_config; 286 bool always_track; 287 288 float object_box_scale_factor_for_features; 289 TrackerConfigTrackerConfig290 explicit TrackerConfig(const Size& image_size) 291 : image_size(image_size), 292 keypoint_detector_config(image_size), 293 flow_config(image_size), 294 always_track(false), 295 object_box_scale_factor_for_features(1.0f) {} 296 }; 297 298 } // namespace tf_tracking 299 300 #endif // TENSORFLOW_EXAMPLES_ANDROID_JNI_OBJECT_TRACKING_CONFIG_H_ 301