1 // Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 // =============================================================================
15
16 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
17
18 #include <errno.h>
19 #include <fcntl.h>
20 #include <stdlib.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23 #include <sys/wait.h>
24 #include <unistd.h>
25
26 #include <vector>
27
28 #include "tensorflow/core/lib/io/path.h"
29 #include "tensorflow/core/lib/strings/numbers.h"
30 #include "tensorflow/core/lib/strings/str_util.h"
31 #include "tensorflow/core/platform/byte_order.h"
32 #include "tensorflow/core/platform/env.h"
33
34 using tensorflow::strings::StrCat;
35
36 namespace tensorflow {
37 namespace ffmpeg {
38 namespace {
39
40 const char kFfmpegExecutable[] = "ffmpeg";
41 const int32 kDefaultProbeSize = 5000000; // 5MB
42
FfmpegAudioCommandLine(const string & input_filename,const string & output_filename,const string & input_format_id,int32 samples_per_second,int32 channel_count,const string & stream)43 std::vector<string> FfmpegAudioCommandLine(const string& input_filename,
44 const string& output_filename,
45 const string& input_format_id,
46 int32 samples_per_second,
47 int32 channel_count,
48 const string& stream) {
49 std::vector<string> command({
50 "-nostats", // No additional progress display.
51 "-nostdin", // No interactive commands accepted.
52 "-f", input_format_id, // eg: "mp3"
53 "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
54 "-loglevel", "error", // Print errors only.
55 "-hide_banner", // Skip printing build options, version, etc.
56 "-map_metadata", "-1", // Copy global metadata from input to output.
57 "-vn", // No video recording.
58 "-ac:a:0", StrCat(channel_count), "-ar:a:0", StrCat(samples_per_second),
59 // Output set (in several ways) to signed 16-bit little-endian ints.
60 "-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le",
61 "-sn", // No subtitle recording.
62 "-y" // Overwrite output file.
63 });
64 if (!stream.empty()) {
65 command.emplace_back("-map");
66 command.emplace_back(StrCat("0:", stream));
67 }
68 command.emplace_back(StrCat(output_filename));
69
70 return command;
71 }
72
FfmpegVideoCommandLine(const string & input_filename,const string & output_filename)73 std::vector<string> FfmpegVideoCommandLine(const string& input_filename,
74 const string& output_filename) {
75 return {"-nostats", // No additional progress display.
76 "-nostdin", // No interactive commands accepted.
77 "-i", input_filename, "-f", "image2pipe", "-probesize",
78 StrCat(kDefaultProbeSize), "-loglevel",
79 // Info is needed to get the information about stream, etc.
80 // It is generated to a separate file, not stdout/stderr.
81 "info",
82 "-hide_banner", // Skip printing build options, version, etc.
83 "-vcodec", "rawvideo", "-pix_fmt", "rgb24",
84 "-y", // Overwrite output file.
85 StrCat(output_filename)};
86 }
87
88 // Is a named binary installed and executable by the current process?
89 // Note that this is harder than it seems like it should be...
IsBinaryInstalled(const string & binary_name)90 bool IsBinaryInstalled(const string& binary_name) {
91 string path = ::getenv("PATH");
92 for (const string& dir : str_util::Split(path, ':')) {
93 const string binary_path = io::JoinPath(dir, binary_name);
94 char absolute_path[PATH_MAX + 1];
95 if (::realpath(binary_path.c_str(), absolute_path) == nullptr) {
96 continue;
97 }
98 struct stat statinfo;
99 int result = ::stat(absolute_path, &statinfo);
100 if (result < 0) {
101 continue;
102 }
103 if (!S_ISREG(statinfo.st_mode)) {
104 continue;
105 }
106
107 // Is the current user able to execute the file?
108 if (statinfo.st_uid == ::geteuid() && statinfo.st_mode & S_IXUSR) {
109 return true;
110 }
111 // Is the current group able to execute the file?
112 if (statinfo.st_uid == ::getegid() && statinfo.st_mode & S_IXGRP) {
113 return true;
114 }
115 // Is anyone able to execute the file?
116 if (statinfo.st_mode & S_IXOTH) {
117 return true;
118 }
119 }
120 return false;
121 }
122
ExecuteFfmpeg(const std::vector<string> & args)123 [[noreturn]] int ExecuteFfmpeg(const std::vector<string>& args) {
124 std::vector<char*> args_chars;
125 std::transform(args.begin(), args.end(), std::back_inserter(args_chars),
126 [](const string& s) { return const_cast<char*>(s.c_str()); });
127 args_chars.push_back(nullptr);
128 ::execvp(kFfmpegExecutable, args_chars.data());
129 // exec only returns on error.
130 const int error = errno;
131 LOG(ERROR) << "FFmpeg could not be executed: " << strerror(error);
132 ::_exit(error);
133 }
134
135 // Reads a PCM file using signed little endian 16-bit encoding (s16le).
ReadPcmFile(const string & filename)136 std::vector<float> ReadPcmFile(const string& filename) {
137 string raw_data;
138 TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &raw_data))
139 << "Could not read FFmpeg output file: " << filename;
140
141 std::vector<float> samples;
142 const int32 sample_count = raw_data.size() / sizeof(int16);
143 samples.reserve(sample_count);
144
145 for (int32 i = 0; i < sample_count; ++i) {
146 // Most of this is jumping through hoops in the standard to convert some
147 // bits into the right format. I hope that an optimizing compiler will
148 // remove almost all of this code.
149 char raw[2] = {raw_data[i * 2], raw_data[i * 2 + 1]};
150 if (!port::kLittleEndian) {
151 std::swap(raw[0], raw[1]);
152 }
153 int16 host_order;
154 ::memcpy(&host_order, raw, sizeof(host_order));
155 const double normalized =
156 static_cast<double>(host_order) / std::numeric_limits<int16>::max();
157 samples.push_back(normalized);
158 }
159 return samples;
160 }
161
162 template <typename UInt>
LittleEndianData(UInt data)163 string LittleEndianData(UInt data) {
164 static_assert(std::is_unsigned<UInt>::value, "UInt must be unsigned");
165 string str;
166 for (size_t i = 0; i < sizeof(UInt); ++i) {
167 const unsigned char bits = static_cast<unsigned char>(data & 0xFFU);
168 char ch;
169 ::memcpy(&ch, &bits, sizeof(bits));
170 str.push_back(ch);
171 data >>= 8;
172 }
173 return str;
174 }
175
LittleEndianDataInt(uint32 data)176 string LittleEndianDataInt(uint32 data) {
177 return LittleEndianData<uint32>(data);
178 }
179
LittleEndianDataShort(uint16 data)180 string LittleEndianDataShort(uint16 data) {
181 return LittleEndianData<uint16>(data);
182 }
183
WavHeader(int32 samples_per_second,int32 channel_count,const std::vector<float> & samples)184 string WavHeader(int32 samples_per_second, int32 channel_count,
185 const std::vector<float>& samples) {
186 string header = "RIFF";
187 header += LittleEndianDataInt(36U + samples.size() * sizeof(int16));
188 header += "WAVEfmt ";
189 header += LittleEndianDataInt(16);
190 header += LittleEndianDataShort(1);
191 header += LittleEndianDataShort(channel_count);
192 header += LittleEndianDataInt(samples_per_second);
193 header +=
194 LittleEndianDataInt(samples_per_second * channel_count * sizeof(int16));
195 header += LittleEndianDataShort(channel_count * sizeof(int16));
196 header += LittleEndianDataShort(16);
197 header += "data";
198 header += LittleEndianDataInt(samples.size() * sizeof(int16));
199 CHECK_EQ(header.size(), 44);
200 return header;
201 }
202
203 // Creates the contents of a .wav file using pcm_s16le format (signed 16 bit
204 // little endian integers).
BuildWavFile(int32 samples_per_second,int32 channel_count,const std::vector<float> & samples)205 string BuildWavFile(int32 samples_per_second, int32 channel_count,
206 const std::vector<float>& samples) {
207 string data = WavHeader(samples_per_second, channel_count, samples);
208 data.reserve(data.size() + samples.size() * sizeof(int16));
209 for (float value : samples) {
210 const int16 quantized =
211 static_cast<int16>(value * std::numeric_limits<int16>::max());
212 char raw[2];
213 ::memcpy(raw, &quantized, sizeof(int16));
214 if (!port::kLittleEndian) {
215 std::swap(raw[0], raw[1]);
216 }
217 data.push_back(raw[0]);
218 data.push_back(raw[1]);
219 }
220 return data;
221 }
222
ReadInfoFile(const string & filename,uint32 * width,uint32 * height,uint32 * frames)223 Status ReadInfoFile(const string& filename, uint32* width, uint32* height,
224 uint32* frames) {
225 string data;
226 TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &data))
227 << "Could not read FFmpeg file: " << filename;
228 bool in_output = false;
229 bool in_mapping = false;
230 uint32 frames_value = 0;
231 uint32 height_value = 0;
232 uint32 width_value = 0;
233 for (const string& line : str_util::Split(data, '\n')) {
234 // Output starts with the first line of `Output #..`.
235 // Further processing output region starts next line so we could continue
236 // the loop.
237 if (!in_output && line.find("Output #") == 0) {
238 in_output = true;
239 in_mapping = false;
240 continue;
241 }
242 // Stream mapping starts with the first line of `Stream mapping`, it also
243 // signals the end of Output section.
244 // Further processing of stream mapping region starts next line so we could
245 // continue the loop.
246 if (!in_mapping && line.find("Stream mapping:") == 0) {
247 in_output = false;
248 in_mapping = true;
249 continue;
250 }
251 if (in_output) {
252 // We only look for the first stream in output `Stream #0`.
253 // Once processed we will not further process output section.
254 if (line.find(" Stream #") == 0) {
255 size_t p = line.find(", rgb24, ", 24);
256 if (p != std::string::npos) {
257 string rgb24 = line.substr(p + 9, line.find(" ", p + 9));
258 rgb24 = rgb24.substr(0, rgb24.find(","));
259 // Strip anything after " ", in case the format is
260 // `640x360 [SAR 1:1 DAR 16:9]`
261 rgb24 = rgb24.substr(0, rgb24.find(" "));
262 string rgb24_width = rgb24.substr(0, rgb24.find("x"));
263 string rgb24_height = rgb24.substr(rgb24_width.length() + 1);
264 if (strings::safe_strtou32(rgb24_width, &width_value) &&
265 strings::safe_strtou32(rgb24_height, &height_value)) {
266 in_output = false;
267 }
268 }
269 }
270 continue;
271 }
272 if (in_mapping) {
273 // We only look for the first stream mapping to have the number of the
274 // frames.
275 // Once processed we will not further process stream mapping section.
276 if (line.find("frame=") == 0) {
277 // The format might be `frame= 166 ` or `frame=12488 `
278 string number = line.substr(6);
279 number = number.substr(number.find_first_not_of(" "));
280 number = number.substr(0, number.find(" "));
281 if (strings::safe_strtou32(number, &frames_value)) {
282 in_mapping = false;
283 }
284 }
285 continue;
286 }
287 }
288 if (frames_value == 0 || height_value == 0 || width_value == 0) {
289 return errors::Unknown("Not enough video info returned by FFmpeg [",
290 frames_value, ", ", height_value, ", ", width_value,
291 ", 3]");
292 }
293 *width = width_value;
294 *height = height_value;
295 *frames = frames_value;
296 return Status::OK();
297 }
298
299 } // namespace
300
~FileDeleter()301 FileDeleter::~FileDeleter() {
302 Env& env = *Env::Default();
303 env.DeleteFile(filename_).IgnoreError();
304 }
305
WriteFile(const string & filename,StringPiece contents)306 Status WriteFile(const string& filename, StringPiece contents) {
307 Env& env = *Env::Default();
308 std::unique_ptr<WritableFile> file;
309 TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file));
310 TF_RETURN_IF_ERROR(file->Append(contents));
311 TF_RETURN_IF_ERROR(file->Close());
312 return Status::OK();
313 }
314
ReadAudioFile(const string & filename,const string & audio_format_id,int32 samples_per_second,int32 channel_count,const string & stream,std::vector<float> * output_samples)315 Status ReadAudioFile(const string& filename, const string& audio_format_id,
316 int32 samples_per_second, int32 channel_count,
317 const string& stream, std::vector<float>* output_samples) {
318 // Create an argument list.
319 string output_filename = io::GetTempFilename("raw");
320 const std::vector<string> args =
321 FfmpegAudioCommandLine(filename, output_filename, audio_format_id,
322 samples_per_second, channel_count, stream);
323 // Unfortunately, it's impossible to differentiate an exec failure due to the
324 // binary being missing and an error from the binary's execution. Therefore,
325 // check to see if the binary *should* be available. If not, return an error
326 // that will be converted into a helpful error message by the TensorFlow op.
327 if (!IsBinaryInstalled(kFfmpegExecutable)) {
328 return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found."));
329 }
330
331 // Execute ffmpeg and report errors.
332 pid_t child_pid = ::fork();
333 if (child_pid < 0) {
334 return Status(error::Code::UNKNOWN,
335 StrCat("fork failed: ", strerror(errno)));
336 }
337 if (child_pid == 0) {
338 ExecuteFfmpeg(args);
339 } else {
340 int status_code;
341 ::waitpid(child_pid, &status_code, 0);
342 if (status_code) {
343 return Status(error::Code::UNKNOWN,
344 StrCat("FFmpeg execution failed: ", status_code));
345 }
346 *output_samples = ReadPcmFile(output_filename);
347 TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename))
348 << output_filename;
349 return Status::OK();
350 }
351 }
352
CreateAudioFile(const string & audio_format_id,int32 bits_per_second,int32 samples_per_second,int32 channel_count,const std::vector<float> & samples,string * output_data)353 Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second,
354 int32 samples_per_second, int32 channel_count,
355 const std::vector<float>& samples, string* output_data) {
356 if (audio_format_id != "wav") {
357 return Status(error::Code::INVALID_ARGUMENT,
358 "CreateAudioFile only supports the 'wav' audio format.");
359 }
360 *output_data = BuildWavFile(samples_per_second, channel_count, samples);
361 return Status::OK();
362 }
363
ReadVideoFile(const string & filename,std::vector<uint8> * output_data,uint32 * width,uint32 * height,uint32 * frames)364 Status ReadVideoFile(const string& filename, std::vector<uint8>* output_data,
365 uint32* width, uint32* height, uint32* frames) {
366 if (!IsBinaryInstalled(kFfmpegExecutable)) {
367 return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found."));
368 }
369
370 string output_filename = io::GetTempFilename("raw");
371 string stderr_filename = io::GetTempFilename("err");
372
373 // Create an argument list.
374 const std::vector<string> args =
375 FfmpegVideoCommandLine(filename, output_filename);
376 // Execute ffmpeg and report errors.
377 pid_t child_pid = ::fork();
378 if (child_pid < 0) {
379 return Status(error::Code::UNKNOWN,
380 StrCat("fork failed: ", strerror(errno)));
381 }
382 if (child_pid == 0) {
383 const int fd =
384 open(stderr_filename.c_str(), O_RDWR | O_CREAT | O_APPEND, 0600);
385 if (fd < 0) {
386 const int error = errno;
387 LOG(ERROR) << "FFmpeg stderr file could not be created: "
388 << strerror(error);
389 ::_exit(error);
390 }
391 close(STDERR_FILENO);
392 dup2(fd, STDERR_FILENO);
393 ExecuteFfmpeg(args);
394 } else {
395 int status_code;
396 if (::waitpid(child_pid, &status_code, 0) < 0) {
397 return Status(error::Code::UNKNOWN,
398 StrCat("waitpid failed: ", strerror(errno)));
399 }
400 if (status_code) {
401 return Status(error::Code::UNKNOWN,
402 StrCat("FFmpeg execution failed: ", status_code));
403 }
404
405 TF_QCHECK_OK(ReadInfoFile(stderr_filename, width, height, frames))
406 << "Could not read FFmpeg stderr file: " << stderr_filename;
407
408 string raw_data;
409 TF_QCHECK_OK(ReadFileToString(Env::Default(), output_filename, &raw_data))
410 << "Could not read FFmpeg output file: " << output_filename;
411 output_data->resize(raw_data.size());
412 std::copy_n(raw_data.data(), raw_data.size(), output_data->begin());
413
414 TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename))
415 << output_filename;
416 TF_QCHECK_OK(Env::Default()->DeleteFile(stderr_filename))
417 << stderr_filename;
418 return Status::OK();
419 }
420 }
421 } // namespace ffmpeg
422 } // namespace tensorflow
423