1 /*
2 *
3 * select3obj.cpp With a calibration chessboard on a table, mark an object in a 3D box and
4 * track that object in all subseqent frames as long as the camera can see
5 * the chessboard. Also segments the object using the box projection. This
6 * program is useful for collecting large datasets of many views of an object
7 * on a table.
8 *
9 */
10
11 #include "opencv2/core.hpp"
12 #include <opencv2/core/utility.hpp>
13 #include "opencv2/imgproc.hpp"
14 #include "opencv2/calib3d.hpp"
15 #include "opencv2/imgcodecs.hpp"
16 #include "opencv2/videoio.hpp"
17 #include "opencv2/highgui.hpp"
18
19 #include <ctype.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22
23 using namespace std;
24 using namespace cv;
25
26 const char* helphelp =
27 "\nThis program's purpose is to collect data sets of an object and its segmentation mask.\n"
28 "\n"
29 "It shows how to use a calibrated camera together with a calibration pattern to\n"
30 "compute the homography of the plane the calibration pattern is on. It also shows grabCut\n"
31 "segmentation etc.\n"
32 "\n"
33 "select3dobj -w <board_width> -h <board_height> [-s <square_size>]\n"
34 " -i <camera_intrinsics_filename> -o <output_prefix> [video_filename/cameraId]\n"
35 "\n"
36 " -w <board_width> Number of chessboard corners wide\n"
37 " -h <board_height> Number of chessboard corners width\n"
38 " [-s <square_size>] Optional measure of chessboard squares in meters\n"
39 " -i <camera_intrinsics_filename> Camera matrix .yml file from calibration.cpp\n"
40 " -o <output_prefix> Prefix the output segmentation images with this\n"
41 " [video_filename/cameraId] If present, read from that video file or that ID\n"
42 "\n"
43 "Using a camera's intrinsics (from calibrating a camera -- see calibration.cpp) and an\n"
44 "image of the object sitting on a planar surface with a calibration pattern of\n"
45 "(board_width x board_height) on the surface, we draw a 3D box aroung the object. From\n"
46 "then on, we can move a camera and as long as it sees the chessboard calibration pattern,\n"
47 "it will store a mask of where the object is. We get succesive images using <output_prefix>\n"
48 "of the segmentation mask containing the object. This makes creating training sets easy.\n"
49 "It is best of the chessboard is odd x even in dimensions to avoid amiguous poses.\n"
50 "\n"
51 "The actions one can use while the program is running are:\n"
52 "\n"
53 " Select object as 3D box with the mouse.\n"
54 " First draw one line on the plane to outline the projection of that object on the plane\n"
55 " Then extend that line into a box to encompass the projection of that object onto the plane\n"
56 " The use the mouse again to extend the box upwards from the plane to encase the object.\n"
57 " Then use the following commands\n"
58 " ESC - Reset the selection\n"
59 " SPACE - Skip the frame; move to the next frame (not in video mode)\n"
60 " ENTER - Confirm the selection. Grab next object in video mode.\n"
61 " q - Exit the program\n"
62 "\n\n";
63
64 // static void help()
65 // {
66 // puts(helphelp);
67 // }
68
69
70 struct MouseEvent
71 {
MouseEventMouseEvent72 MouseEvent() { event = -1; buttonState = 0; }
73 Point pt;
74 int event;
75 int buttonState;
76 };
77
onMouse(int event,int x,int y,int flags,void * userdata)78 static void onMouse(int event, int x, int y, int flags, void* userdata)
79 {
80 MouseEvent* data = (MouseEvent*)userdata;
81 data->event = event;
82 data->pt = Point(x,y);
83 data->buttonState = flags;
84 }
85
readCameraMatrix(const string & filename,Mat & cameraMatrix,Mat & distCoeffs,Size & calibratedImageSize)86 static bool readCameraMatrix(const string& filename,
87 Mat& cameraMatrix, Mat& distCoeffs,
88 Size& calibratedImageSize )
89 {
90 FileStorage fs(filename, FileStorage::READ);
91 fs["image_width"] >> calibratedImageSize.width;
92 fs["image_height"] >> calibratedImageSize.height;
93 fs["distortion_coefficients"] >> distCoeffs;
94 fs["camera_matrix"] >> cameraMatrix;
95
96 if( distCoeffs.type() != CV_64F )
97 distCoeffs = Mat_<double>(distCoeffs);
98 if( cameraMatrix.type() != CV_64F )
99 cameraMatrix = Mat_<double>(cameraMatrix);
100
101 return true;
102 }
103
calcChessboardCorners(Size boardSize,float squareSize,vector<Point3f> & corners)104 static void calcChessboardCorners(Size boardSize, float squareSize, vector<Point3f>& corners)
105 {
106 corners.resize(0);
107
108 for( int i = 0; i < boardSize.height; i++ )
109 for( int j = 0; j < boardSize.width; j++ )
110 corners.push_back(Point3f(float(j*squareSize),
111 float(i*squareSize), 0));
112 }
113
114
image2plane(Point2f imgpt,const Mat & R,const Mat & tvec,const Mat & cameraMatrix,double Z)115 static Point3f image2plane(Point2f imgpt, const Mat& R, const Mat& tvec,
116 const Mat& cameraMatrix, double Z)
117 {
118 Mat R1 = R.clone();
119 R1.col(2) = R1.col(2)*Z + tvec;
120 Mat_<double> v = (cameraMatrix*R1).inv()*(Mat_<double>(3,1) << imgpt.x, imgpt.y, 1);
121 double iw = fabs(v(2,0)) > DBL_EPSILON ? 1./v(2,0) : 0;
122 return Point3f((float)(v(0,0)*iw), (float)(v(1,0)*iw), (float)Z);
123 }
124
125
extract3DBox(const Mat & frame,Mat & shownFrame,Mat & selectedObjFrame,const Mat & cameraMatrix,const Mat & rvec,const Mat & tvec,const vector<Point3f> & box,int nobjpt,bool runExtraSegmentation)126 static Rect extract3DBox(const Mat& frame, Mat& shownFrame, Mat& selectedObjFrame,
127 const Mat& cameraMatrix, const Mat& rvec, const Mat& tvec,
128 const vector<Point3f>& box, int nobjpt, bool runExtraSegmentation)
129 {
130 selectedObjFrame = Mat::zeros(frame.size(), frame.type());
131 if( nobjpt == 0 )
132 return Rect();
133 vector<Point3f> objpt;
134 vector<Point2f> imgpt;
135
136 objpt.push_back(box[0]);
137 if( nobjpt > 1 )
138 objpt.push_back(box[1]);
139 if( nobjpt > 2 )
140 {
141 objpt.push_back(box[2]);
142 objpt.push_back(objpt[2] - objpt[1] + objpt[0]);
143 }
144 if( nobjpt > 3 )
145 for( int i = 0; i < 4; i++ )
146 objpt.push_back(Point3f(objpt[i].x, objpt[i].y, box[3].z));
147
148 projectPoints(Mat(objpt), rvec, tvec, cameraMatrix, Mat(), imgpt);
149
150 if( !shownFrame.empty() )
151 {
152 if( nobjpt == 1 )
153 circle(shownFrame, imgpt[0], 3, Scalar(0,255,0), -1, LINE_AA);
154 else if( nobjpt == 2 )
155 {
156 circle(shownFrame, imgpt[0], 3, Scalar(0,255,0), -1, LINE_AA);
157 circle(shownFrame, imgpt[1], 3, Scalar(0,255,0), -1, LINE_AA);
158 line(shownFrame, imgpt[0], imgpt[1], Scalar(0,255,0), 3, LINE_AA);
159 }
160 else if( nobjpt == 3 )
161 for( int i = 0; i < 4; i++ )
162 {
163 circle(shownFrame, imgpt[i], 3, Scalar(0,255,0), -1, LINE_AA);
164 line(shownFrame, imgpt[i], imgpt[(i+1)%4], Scalar(0,255,0), 3, LINE_AA);
165 }
166 else
167 for( int i = 0; i < 8; i++ )
168 {
169 circle(shownFrame, imgpt[i], 3, Scalar(0,255,0), -1, LINE_AA);
170 line(shownFrame, imgpt[i], imgpt[(i+1)%4 + (i/4)*4], Scalar(0,255,0), 3, LINE_AA);
171 line(shownFrame, imgpt[i], imgpt[i%4], Scalar(0,255,0), 3, LINE_AA);
172 }
173 }
174
175 if( nobjpt <= 2 )
176 return Rect();
177 vector<Point> hull;
178 convexHull(Mat_<Point>(Mat(imgpt)), hull);
179 Mat selectedObjMask = Mat::zeros(frame.size(), CV_8U);
180 fillConvexPoly(selectedObjMask, &hull[0], (int)hull.size(), Scalar::all(255), 8, 0);
181 Rect roi = boundingRect(Mat(hull)) & Rect(Point(), frame.size());
182
183 if( runExtraSegmentation )
184 {
185 selectedObjMask = Scalar::all(GC_BGD);
186 fillConvexPoly(selectedObjMask, &hull[0], (int)hull.size(), Scalar::all(GC_PR_FGD), 8, 0);
187 Mat bgdModel, fgdModel;
188 grabCut(frame, selectedObjMask, roi, bgdModel, fgdModel,
189 3, GC_INIT_WITH_RECT + GC_INIT_WITH_MASK);
190 bitwise_and(selectedObjMask, Scalar::all(1), selectedObjMask);
191 }
192
193 frame.copyTo(selectedObjFrame, selectedObjMask);
194 return roi;
195 }
196
197
select3DBox(const string & windowname,const string & selWinName,const Mat & frame,const Mat & cameraMatrix,const Mat & rvec,const Mat & tvec,vector<Point3f> & box)198 static int select3DBox(const string& windowname, const string& selWinName, const Mat& frame,
199 const Mat& cameraMatrix, const Mat& rvec, const Mat& tvec,
200 vector<Point3f>& box)
201 {
202 const float eps = 1e-3f;
203 MouseEvent mouse;
204
205 setMouseCallback(windowname, onMouse, &mouse);
206 vector<Point3f> tempobj(8);
207 vector<Point2f> imgpt(4), tempimg(8);
208 vector<Point> temphull;
209 int nobjpt = 0;
210 Mat R, selectedObjMask, selectedObjFrame, shownFrame;
211 Rodrigues(rvec, R);
212 box.resize(4);
213
214 for(;;)
215 {
216 float Z = 0.f;
217 bool dragging = (mouse.buttonState & EVENT_FLAG_LBUTTON) != 0;
218 int npt = nobjpt;
219
220 if( (mouse.event == EVENT_LBUTTONDOWN ||
221 mouse.event == EVENT_LBUTTONUP ||
222 dragging) && nobjpt < 4 )
223 {
224 Point2f m = mouse.pt;
225
226 if( nobjpt < 2 )
227 imgpt[npt] = m;
228 else
229 {
230 tempobj.resize(1);
231 int nearestIdx = npt-1;
232 if( nobjpt == 3 )
233 {
234 nearestIdx = 0;
235 for( int i = 1; i < npt; i++ )
236 if( norm(m - imgpt[i]) < norm(m - imgpt[nearestIdx]) )
237 nearestIdx = i;
238 }
239
240 if( npt == 2 )
241 {
242 float dx = box[1].x - box[0].x, dy = box[1].y - box[0].y;
243 float len = 1.f/std::sqrt(dx*dx+dy*dy);
244 tempobj[0] = Point3f(dy*len + box[nearestIdx].x,
245 -dx*len + box[nearestIdx].y, 0.f);
246 }
247 else
248 tempobj[0] = Point3f(box[nearestIdx].x, box[nearestIdx].y, 1.f);
249
250 projectPoints(Mat(tempobj), rvec, tvec, cameraMatrix, Mat(), tempimg);
251
252 Point2f a = imgpt[nearestIdx], b = tempimg[0], d1 = b - a, d2 = m - a;
253 float n1 = (float)norm(d1), n2 = (float)norm(d2);
254 if( n1*n2 < eps )
255 imgpt[npt] = a;
256 else
257 {
258 Z = d1.dot(d2)/(n1*n1);
259 imgpt[npt] = d1*Z + a;
260 }
261 }
262 box[npt] = image2plane(imgpt[npt], R, tvec, cameraMatrix, npt<3 ? 0 : Z);
263
264 if( (npt == 0 && mouse.event == EVENT_LBUTTONDOWN) ||
265 (npt > 0 && norm(box[npt] - box[npt-1]) > eps &&
266 mouse.event == EVENT_LBUTTONUP) )
267 {
268 nobjpt++;
269 if( nobjpt < 4 )
270 {
271 imgpt[nobjpt] = imgpt[nobjpt-1];
272 box[nobjpt] = box[nobjpt-1];
273 }
274 }
275
276 // reset the event
277 mouse.event = -1;
278 //mouse.buttonState = 0;
279 npt++;
280 }
281
282 frame.copyTo(shownFrame);
283 extract3DBox(frame, shownFrame, selectedObjFrame,
284 cameraMatrix, rvec, tvec, box, npt, false);
285 imshow(windowname, shownFrame);
286 imshow(selWinName, selectedObjFrame);
287
288 int c = waitKey(30);
289 if( (c & 255) == 27 )
290 {
291 nobjpt = 0;
292 }
293 if( c == 'q' || c == 'Q' || c == ' ' )
294 {
295 box.clear();
296 return c == ' ' ? -1 : -100;
297 }
298 if( (c == '\r' || c == '\n') && nobjpt == 4 && box[3].z != 0 )
299 return 1;
300 }
301 }
302
303
readModelViews(const string & filename,vector<Point3f> & box,vector<string> & imagelist,vector<Rect> & roiList,vector<Vec6f> & poseList)304 static bool readModelViews( const string& filename, vector<Point3f>& box,
305 vector<string>& imagelist,
306 vector<Rect>& roiList, vector<Vec6f>& poseList )
307 {
308 imagelist.resize(0);
309 roiList.resize(0);
310 poseList.resize(0);
311 box.resize(0);
312
313 FileStorage fs(filename, FileStorage::READ);
314 if( !fs.isOpened() )
315 return false;
316 fs["box"] >> box;
317
318 FileNode all = fs["views"];
319 if( all.type() != FileNode::SEQ )
320 return false;
321 FileNodeIterator it = all.begin(), it_end = all.end();
322
323 for(; it != it_end; ++it)
324 {
325 FileNode n = *it;
326 imagelist.push_back((string)n["image"]);
327 FileNode nr = n["rect"];
328 roiList.push_back(Rect((int)nr[0], (int)nr[1], (int)nr[2], (int)nr[3]));
329 FileNode np = n["pose"];
330 poseList.push_back(Vec6f((float)np[0], (float)np[1], (float)np[2],
331 (float)np[3], (float)np[4], (float)np[5]));
332 }
333
334 return true;
335 }
336
337
writeModelViews(const string & filename,const vector<Point3f> & box,const vector<string> & imagelist,const vector<Rect> & roiList,const vector<Vec6f> & poseList)338 static bool writeModelViews(const string& filename, const vector<Point3f>& box,
339 const vector<string>& imagelist,
340 const vector<Rect>& roiList,
341 const vector<Vec6f>& poseList)
342 {
343 FileStorage fs(filename, FileStorage::WRITE);
344 if( !fs.isOpened() )
345 return false;
346
347 fs << "box" << "[:";
348 fs << box << "]" << "views" << "[";
349
350 size_t i, nviews = imagelist.size();
351
352 CV_Assert( nviews == roiList.size() && nviews == poseList.size() );
353
354 for( i = 0; i < nviews; i++ )
355 {
356 Rect r = roiList[i];
357 Vec6f p = poseList[i];
358
359 fs << "{" << "image" << imagelist[i] <<
360 "roi" << "[:" << r.x << r.y << r.width << r.height << "]" <<
361 "pose" << "[:" << p[0] << p[1] << p[2] << p[3] << p[4] << p[5] << "]" << "}";
362 }
363 fs << "]";
364
365 return true;
366 }
367
368
readStringList(const string & filename,vector<string> & l)369 static bool readStringList( const string& filename, vector<string>& l )
370 {
371 l.resize(0);
372 FileStorage fs(filename, FileStorage::READ);
373 if( !fs.isOpened() )
374 return false;
375 FileNode n = fs.getFirstTopLevelNode();
376 if( n.type() != FileNode::SEQ )
377 return false;
378 FileNodeIterator it = n.begin(), it_end = n.end();
379 for( ; it != it_end; ++it )
380 l.push_back((string)*it);
381 return true;
382 }
383
384
main(int argc,char ** argv)385 int main(int argc, char** argv)
386 {
387 const char* help = "Usage: select3dobj -w <board_width> -h <board_height> [-s <square_size>]\n"
388 "\t-i <intrinsics_filename> -o <output_prefix> [video_filename/cameraId]\n";
389 const char* screen_help =
390 "Actions: \n"
391 "\tSelect object as 3D box with the mouse. That's it\n"
392 "\tESC - Reset the selection\n"
393 "\tSPACE - Skip the frame; move to the next frame (not in video mode)\n"
394 "\tENTER - Confirm the selection. Grab next object in video mode.\n"
395 "\tq - Exit the program\n";
396
397 if(argc < 5)
398 {
399 puts(helphelp);
400 puts(help);
401 return 0;
402 }
403 const char* intrinsicsFilename = 0;
404 const char* outprefix = 0;
405 const char* inputName = 0;
406 int cameraId = 0;
407 Size boardSize;
408 double squareSize = 1;
409 vector<string> imageList;
410
411 for( int i = 1; i < argc; i++ )
412 {
413 if( strcmp(argv[i], "-i") == 0 )
414 intrinsicsFilename = argv[++i];
415 else if( strcmp(argv[i], "-o") == 0 )
416 outprefix = argv[++i];
417 else if( strcmp(argv[i], "-w") == 0 )
418 {
419 if(sscanf(argv[++i], "%d", &boardSize.width) != 1 || boardSize.width <= 0)
420 {
421 printf("Incorrect -w parameter (must be a positive integer)\n");
422 puts(help);
423 return 0;
424 }
425 }
426 else if( strcmp(argv[i], "-h") == 0 )
427 {
428 if(sscanf(argv[++i], "%d", &boardSize.height) != 1 || boardSize.height <= 0)
429 {
430 printf("Incorrect -h parameter (must be a positive integer)\n");
431 puts(help);
432 return 0;
433 }
434 }
435 else if( strcmp(argv[i], "-s") == 0 )
436 {
437 if(sscanf(argv[++i], "%lf", &squareSize) != 1 || squareSize <= 0)
438 {
439 printf("Incorrect -w parameter (must be a positive real number)\n");
440 puts(help);
441 return 0;
442 }
443 }
444 else if( argv[i][0] != '-' )
445 {
446 if( isdigit(argv[i][0]))
447 sscanf(argv[i], "%d", &cameraId);
448 else
449 inputName = argv[i];
450 }
451 else
452 {
453 printf("Incorrect option\n");
454 puts(help);
455 return 0;
456 }
457 }
458
459 if( !intrinsicsFilename || !outprefix ||
460 boardSize.width <= 0 || boardSize.height <= 0 )
461 {
462 printf("Some of the required parameters are missing\n");
463 puts(help);
464 return 0;
465 }
466
467 Mat cameraMatrix, distCoeffs;
468 Size calibratedImageSize;
469 readCameraMatrix(intrinsicsFilename, cameraMatrix, distCoeffs, calibratedImageSize );
470
471 VideoCapture capture;
472 if( inputName )
473 {
474 if( !readStringList(inputName, imageList) &&
475 !capture.open(inputName))
476 {
477 fprintf( stderr, "The input file could not be opened\n" );
478 return -1;
479 }
480 }
481 else
482 capture.open(cameraId);
483
484 if( !capture.isOpened() && imageList.empty() )
485 return fprintf( stderr, "Could not initialize video capture\n" ), -2;
486
487 const char* outbarename = 0;
488 {
489 outbarename = strrchr(outprefix, '/');
490 const char* tmp = strrchr(outprefix, '\\');
491 char cmd[1000];
492 sprintf(cmd, "mkdir %s", outprefix);
493 if( tmp && tmp > outbarename )
494 outbarename = tmp;
495 if( outbarename )
496 {
497 cmd[6 + outbarename - outprefix] = '\0';
498 int result = system(cmd);
499 CV_Assert(result == 0);
500 outbarename++;
501 }
502 else
503 outbarename = outprefix;
504 }
505
506 Mat frame, shownFrame, selectedObjFrame, mapxy;
507
508 namedWindow("View", 1);
509 namedWindow("Selected Object", 1);
510 setMouseCallback("View", onMouse, 0);
511 bool boardFound = false;
512
513 string indexFilename = format("%s_index.yml", outprefix);
514
515 vector<string> capturedImgList;
516 vector<Rect> roiList;
517 vector<Vec6f> poseList;
518 vector<Point3f> box, boardPoints;
519
520 readModelViews(indexFilename, box, capturedImgList, roiList, poseList);
521 calcChessboardCorners(boardSize, (float)squareSize, boardPoints);
522 int frameIdx = 0;
523 bool grabNext = !imageList.empty();
524
525 puts(screen_help);
526
527 for(int i = 0;;i++)
528 {
529 Mat frame0;
530 if( !imageList.empty() )
531 {
532 if( i < (int)imageList.size() )
533 frame0 = imread(string(imageList[i]), 1);
534 }
535 else
536 capture >> frame0;
537 if( frame0.empty() )
538 break;
539 if( frame.empty() )
540 {
541 if( frame0.size() != calibratedImageSize )
542 {
543 double sx = (double)frame0.cols/calibratedImageSize.width;
544 double sy = (double)frame0.rows/calibratedImageSize.height;
545
546 // adjust the camera matrix for the new resolution
547 cameraMatrix.at<double>(0,0) *= sx;
548 cameraMatrix.at<double>(0,2) *= sx;
549 cameraMatrix.at<double>(1,1) *= sy;
550 cameraMatrix.at<double>(1,2) *= sy;
551 }
552 Mat dummy;
553 initUndistortRectifyMap(cameraMatrix, distCoeffs, Mat(),
554 cameraMatrix, frame0.size(),
555 CV_32FC2, mapxy, dummy );
556 distCoeffs = Mat::zeros(5, 1, CV_64F);
557 }
558 remap(frame0, frame, mapxy, Mat(), INTER_LINEAR);
559 vector<Point2f> foundBoardCorners;
560 boardFound = findChessboardCorners(frame, boardSize, foundBoardCorners);
561
562 Mat rvec, tvec;
563 if( boardFound )
564 solvePnP(Mat(boardPoints), Mat(foundBoardCorners), cameraMatrix,
565 distCoeffs, rvec, tvec, false);
566
567 frame.copyTo(shownFrame);
568 drawChessboardCorners(shownFrame, boardSize, Mat(foundBoardCorners), boardFound);
569 selectedObjFrame = Mat::zeros(frame.size(), frame.type());
570
571 if( boardFound && grabNext )
572 {
573 if( box.empty() )
574 {
575 int code = select3DBox("View", "Selected Object", frame,
576 cameraMatrix, rvec, tvec, box);
577 if( code == -100 )
578 break;
579 }
580
581 if( !box.empty() )
582 {
583 Rect r = extract3DBox(frame, shownFrame, selectedObjFrame,
584 cameraMatrix, rvec, tvec, box, 4, true);
585 if( r.area() )
586 {
587 const int maxFrameIdx = 10000;
588 char path[1000];
589 for(;frameIdx < maxFrameIdx;frameIdx++)
590 {
591 sprintf(path, "%s%04d.jpg", outprefix, frameIdx);
592 FILE* f = fopen(path, "rb");
593 if( !f )
594 break;
595 fclose(f);
596 }
597 if( frameIdx == maxFrameIdx )
598 {
599 printf("Can not save the image as %s<...>.jpg", outprefix);
600 break;
601 }
602 imwrite(path, selectedObjFrame(r));
603
604 capturedImgList.push_back(string(path));
605 roiList.push_back(r);
606
607 float p[6];
608 Mat RV(3, 1, CV_32F, p), TV(3, 1, CV_32F, p+3);
609 rvec.convertTo(RV, RV.type());
610 tvec.convertTo(TV, TV.type());
611 poseList.push_back(Vec6f(p[0], p[1], p[2], p[3], p[4], p[5]));
612 }
613 }
614 grabNext = !imageList.empty();
615 }
616
617 imshow("View", shownFrame);
618 imshow("Selected Object", selectedObjFrame);
619 int c = waitKey(imageList.empty() && !box.empty() ? 30 : 300);
620 if( c == 'q' || c == 'Q' )
621 break;
622 if( c == '\r' || c == '\n' )
623 grabNext = true;
624 }
625
626 writeModelViews(indexFilename, box, capturedImgList, roiList, poseList);
627 return 0;
628 }
629