• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The TensorFlow Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 // =============================================================================
15 
16 #include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
17 
18 #include <errno.h>
19 #include <fcntl.h>
20 #include <stdlib.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23 #include <sys/wait.h>
24 #include <unistd.h>
25 
26 #include <vector>
27 
28 #include "tensorflow/core/lib/io/path.h"
29 #include "tensorflow/core/lib/strings/numbers.h"
30 #include "tensorflow/core/lib/strings/str_util.h"
31 #include "tensorflow/core/platform/byte_order.h"
32 #include "tensorflow/core/platform/env.h"
33 
34 using tensorflow::strings::StrCat;
35 
36 namespace tensorflow {
37 namespace ffmpeg {
38 namespace {
39 
40 const char kFfmpegExecutable[] = "ffmpeg";
41 const int32 kDefaultProbeSize = 5000000;  // 5MB
42 
FfmpegAudioCommandLine(const string & input_filename,const string & output_filename,const string & input_format_id,int32 samples_per_second,int32 channel_count,const string & stream)43 std::vector<string> FfmpegAudioCommandLine(const string& input_filename,
44                                            const string& output_filename,
45                                            const string& input_format_id,
46                                            int32 samples_per_second,
47                                            int32 channel_count,
48                                            const string& stream) {
49   std::vector<string> command({
50       "-nostats",             // No additional progress display.
51       "-nostdin",             // No interactive commands accepted.
52       "-f", input_format_id,  // eg: "mp3"
53       "-probesize", StrCat(kDefaultProbeSize), "-i", input_filename,
54       "-loglevel", "error",   // Print errors only.
55       "-hide_banner",         // Skip printing build options, version, etc.
56       "-map_metadata", "-1",  // Copy global metadata from input to output.
57       "-vn",                  // No video recording.
58       "-ac:a:0", StrCat(channel_count), "-ar:a:0", StrCat(samples_per_second),
59       // Output set (in several ways) to signed 16-bit little-endian ints.
60       "-codec:a:0", "pcm_s16le", "-sample_fmt", "s16", "-f", "s16le",
61       "-sn",  // No subtitle recording.
62       "-y"    // Overwrite output file.
63   });
64   if (!stream.empty()) {
65     command.emplace_back("-map");
66     command.emplace_back(StrCat("0:", stream));
67   }
68   command.emplace_back(StrCat(output_filename));
69 
70   return command;
71 }
72 
FfmpegVideoCommandLine(const string & input_filename,const string & output_filename)73 std::vector<string> FfmpegVideoCommandLine(const string& input_filename,
74                                            const string& output_filename) {
75   return {"-nostats",  // No additional progress display.
76           "-nostdin",  // No interactive commands accepted.
77           "-i", input_filename, "-f", "image2pipe", "-probesize",
78           StrCat(kDefaultProbeSize), "-loglevel",
79           // Info is needed to get the information about stream, etc.
80           // It is generated to a separate file, not stdout/stderr.
81           "info",
82           "-hide_banner",  // Skip printing build options, version, etc.
83           "-vcodec", "rawvideo", "-pix_fmt", "rgb24",
84           "-y",  // Overwrite output file.
85           StrCat(output_filename)};
86 }
87 
88 // Is a named binary installed and executable by the current process?
89 // Note that this is harder than it seems like it should be...
IsBinaryInstalled(const string & binary_name)90 bool IsBinaryInstalled(const string& binary_name) {
91   string path = ::getenv("PATH");
92   for (const string& dir : str_util::Split(path, ':')) {
93     const string binary_path = io::JoinPath(dir, binary_name);
94     char absolute_path[PATH_MAX + 1];
95     if (::realpath(binary_path.c_str(), absolute_path) == nullptr) {
96       continue;
97     }
98     struct stat statinfo;
99     int result = ::stat(absolute_path, &statinfo);
100     if (result < 0) {
101       continue;
102     }
103     if (!S_ISREG(statinfo.st_mode)) {
104       continue;
105     }
106 
107     // Is the current user able to execute the file?
108     if (statinfo.st_uid == ::geteuid() && statinfo.st_mode & S_IXUSR) {
109       return true;
110     }
111     // Is the current group able to execute the file?
112     if (statinfo.st_uid == ::getegid() && statinfo.st_mode & S_IXGRP) {
113       return true;
114     }
115     // Is anyone able to execute the file?
116     if (statinfo.st_mode & S_IXOTH) {
117       return true;
118     }
119   }
120   return false;
121 }
122 
ExecuteFfmpeg(const std::vector<string> & args)123 [[noreturn]] int ExecuteFfmpeg(const std::vector<string>& args) {
124   std::vector<char*> args_chars;
125   std::transform(args.begin(), args.end(), std::back_inserter(args_chars),
126                  [](const string& s) { return const_cast<char*>(s.c_str()); });
127   args_chars.push_back(nullptr);
128   ::execvp(kFfmpegExecutable, args_chars.data());
129   // exec only returns on error.
130   const int error = errno;
131   LOG(ERROR) << "FFmpeg could not be executed: " << strerror(error);
132   ::_exit(error);
133 }
134 
135 // Reads a PCM file using signed little endian 16-bit encoding (s16le).
ReadPcmFile(const string & filename)136 std::vector<float> ReadPcmFile(const string& filename) {
137   string raw_data;
138   TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &raw_data))
139       << "Could not read FFmpeg output file: " << filename;
140 
141   std::vector<float> samples;
142   const int32 sample_count = raw_data.size() / sizeof(int16);
143   samples.reserve(sample_count);
144 
145   for (int32 i = 0; i < sample_count; ++i) {
146     // Most of this is jumping through hoops in the standard to convert some
147     // bits into the right format. I hope that an optimizing compiler will
148     // remove almost all of this code.
149     char raw[2] = {raw_data[i * 2], raw_data[i * 2 + 1]};
150     if (!port::kLittleEndian) {
151       std::swap(raw[0], raw[1]);
152     }
153     int16 host_order;
154     ::memcpy(&host_order, raw, sizeof(host_order));
155     const double normalized =
156         static_cast<double>(host_order) / std::numeric_limits<int16>::max();
157     samples.push_back(normalized);
158   }
159   return samples;
160 }
161 
162 template <typename UInt>
LittleEndianData(UInt data)163 string LittleEndianData(UInt data) {
164   static_assert(std::is_unsigned<UInt>::value, "UInt must be unsigned");
165   string str;
166   for (size_t i = 0; i < sizeof(UInt); ++i) {
167     const unsigned char bits = static_cast<unsigned char>(data & 0xFFU);
168     char ch;
169     ::memcpy(&ch, &bits, sizeof(bits));
170     str.push_back(ch);
171     data >>= 8;
172   }
173   return str;
174 }
175 
LittleEndianDataInt(uint32 data)176 string LittleEndianDataInt(uint32 data) {
177   return LittleEndianData<uint32>(data);
178 }
179 
LittleEndianDataShort(uint16 data)180 string LittleEndianDataShort(uint16 data) {
181   return LittleEndianData<uint16>(data);
182 }
183 
WavHeader(int32 samples_per_second,int32 channel_count,const std::vector<float> & samples)184 string WavHeader(int32 samples_per_second, int32 channel_count,
185                  const std::vector<float>& samples) {
186   string header = "RIFF";
187   header += LittleEndianDataInt(36U + samples.size() * sizeof(int16));
188   header += "WAVEfmt ";
189   header += LittleEndianDataInt(16);
190   header += LittleEndianDataShort(1);
191   header += LittleEndianDataShort(channel_count);
192   header += LittleEndianDataInt(samples_per_second);
193   header +=
194       LittleEndianDataInt(samples_per_second * channel_count * sizeof(int16));
195   header += LittleEndianDataShort(channel_count * sizeof(int16));
196   header += LittleEndianDataShort(16);
197   header += "data";
198   header += LittleEndianDataInt(samples.size() * sizeof(int16));
199   CHECK_EQ(header.size(), 44);
200   return header;
201 }
202 
203 // Creates the contents of a .wav file using pcm_s16le format (signed 16 bit
204 // little endian integers).
BuildWavFile(int32 samples_per_second,int32 channel_count,const std::vector<float> & samples)205 string BuildWavFile(int32 samples_per_second, int32 channel_count,
206                     const std::vector<float>& samples) {
207   string data = WavHeader(samples_per_second, channel_count, samples);
208   data.reserve(data.size() + samples.size() * sizeof(int16));
209   for (float value : samples) {
210     const int16 quantized =
211         static_cast<int16>(value * std::numeric_limits<int16>::max());
212     char raw[2];
213     ::memcpy(raw, &quantized, sizeof(int16));
214     if (!port::kLittleEndian) {
215       std::swap(raw[0], raw[1]);
216     }
217     data.push_back(raw[0]);
218     data.push_back(raw[1]);
219   }
220   return data;
221 }
222 
ReadInfoFile(const string & filename,uint32 * width,uint32 * height,uint32 * frames)223 Status ReadInfoFile(const string& filename, uint32* width, uint32* height,
224                     uint32* frames) {
225   string data;
226   TF_QCHECK_OK(ReadFileToString(Env::Default(), filename, &data))
227       << "Could not read FFmpeg file: " << filename;
228   bool in_output = false;
229   bool in_mapping = false;
230   uint32 frames_value = 0;
231   uint32 height_value = 0;
232   uint32 width_value = 0;
233   for (const string& line : str_util::Split(data, '\n')) {
234     // Output starts with the first line of `Output #..`.
235     // Further processing output region starts next line so we could continue
236     // the loop.
237     if (!in_output && line.find("Output #") == 0) {
238       in_output = true;
239       in_mapping = false;
240       continue;
241     }
242     // Stream mapping starts with the first line of `Stream mapping`, it also
243     // signals the end of Output section.
244     // Further processing of stream mapping region starts next line so we could
245     // continue the loop.
246     if (!in_mapping && line.find("Stream mapping:") == 0) {
247       in_output = false;
248       in_mapping = true;
249       continue;
250     }
251     if (in_output) {
252       // We only look for the first stream in output `Stream #0`.
253       // Once processed we will not further process output section.
254       if (line.find("    Stream #") == 0) {
255         size_t p = line.find(", rgb24, ", 24);
256         if (p != std::string::npos) {
257           string rgb24 = line.substr(p + 9, line.find(" ", p + 9));
258           rgb24 = rgb24.substr(0, rgb24.find(","));
259           // Strip anything after " ", in case the format is
260           // `640x360 [SAR 1:1 DAR 16:9]`
261           rgb24 = rgb24.substr(0, rgb24.find(" "));
262           string rgb24_width = rgb24.substr(0, rgb24.find("x"));
263           string rgb24_height = rgb24.substr(rgb24_width.length() + 1);
264           if (strings::safe_strtou32(rgb24_width, &width_value) &&
265               strings::safe_strtou32(rgb24_height, &height_value)) {
266             in_output = false;
267           }
268         }
269       }
270       continue;
271     }
272     if (in_mapping) {
273       // We only look for the first stream mapping to have the number of the
274       // frames.
275       // Once processed we will not further process stream mapping section.
276       if (line.find("frame=") == 0) {
277         // The format might be `frame=  166 ` or `frame=12488 `
278         string number = line.substr(6);
279         number = number.substr(number.find_first_not_of(" "));
280         number = number.substr(0, number.find(" "));
281         if (strings::safe_strtou32(number, &frames_value)) {
282           in_mapping = false;
283         }
284       }
285       continue;
286     }
287   }
288   if (frames_value == 0 || height_value == 0 || width_value == 0) {
289     return errors::Unknown("Not enough video info returned by FFmpeg [",
290                            frames_value, ", ", height_value, ", ", width_value,
291                            ", 3]");
292   }
293   *width = width_value;
294   *height = height_value;
295   *frames = frames_value;
296   return Status::OK();
297 }
298 
299 }  // namespace
300 
~FileDeleter()301 FileDeleter::~FileDeleter() {
302   Env& env = *Env::Default();
303   env.DeleteFile(filename_).IgnoreError();
304 }
305 
WriteFile(const string & filename,StringPiece contents)306 Status WriteFile(const string& filename, StringPiece contents) {
307   Env& env = *Env::Default();
308   std::unique_ptr<WritableFile> file;
309   TF_RETURN_IF_ERROR(env.NewWritableFile(filename, &file));
310   TF_RETURN_IF_ERROR(file->Append(contents));
311   TF_RETURN_IF_ERROR(file->Close());
312   return Status::OK();
313 }
314 
ReadAudioFile(const string & filename,const string & audio_format_id,int32 samples_per_second,int32 channel_count,const string & stream,std::vector<float> * output_samples)315 Status ReadAudioFile(const string& filename, const string& audio_format_id,
316                      int32 samples_per_second, int32 channel_count,
317                      const string& stream, std::vector<float>* output_samples) {
318   // Create an argument list.
319   string output_filename = io::GetTempFilename("raw");
320   const std::vector<string> args =
321       FfmpegAudioCommandLine(filename, output_filename, audio_format_id,
322                              samples_per_second, channel_count, stream);
323   // Unfortunately, it's impossible to differentiate an exec failure due to the
324   // binary being missing and an error from the binary's execution. Therefore,
325   // check to see if the binary *should* be available. If not, return an error
326   // that will be converted into a helpful error message by the TensorFlow op.
327   if (!IsBinaryInstalled(kFfmpegExecutable)) {
328     return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found."));
329   }
330 
331   // Execute ffmpeg and report errors.
332   pid_t child_pid = ::fork();
333   if (child_pid < 0) {
334     return Status(error::Code::UNKNOWN,
335                   StrCat("fork failed: ", strerror(errno)));
336   }
337   if (child_pid == 0) {
338     ExecuteFfmpeg(args);
339   } else {
340     int status_code;
341     ::waitpid(child_pid, &status_code, 0);
342     if (status_code) {
343       return Status(error::Code::UNKNOWN,
344                     StrCat("FFmpeg execution failed: ", status_code));
345     }
346     *output_samples = ReadPcmFile(output_filename);
347     TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename))
348         << output_filename;
349     return Status::OK();
350   }
351 }
352 
CreateAudioFile(const string & audio_format_id,int32 bits_per_second,int32 samples_per_second,int32 channel_count,const std::vector<float> & samples,string * output_data)353 Status CreateAudioFile(const string& audio_format_id, int32 bits_per_second,
354                        int32 samples_per_second, int32 channel_count,
355                        const std::vector<float>& samples, string* output_data) {
356   if (audio_format_id != "wav") {
357     return Status(error::Code::INVALID_ARGUMENT,
358                   "CreateAudioFile only supports the 'wav' audio format.");
359   }
360   *output_data = BuildWavFile(samples_per_second, channel_count, samples);
361   return Status::OK();
362 }
363 
ReadVideoFile(const string & filename,std::vector<uint8> * output_data,uint32 * width,uint32 * height,uint32 * frames)364 Status ReadVideoFile(const string& filename, std::vector<uint8>* output_data,
365                      uint32* width, uint32* height, uint32* frames) {
366   if (!IsBinaryInstalled(kFfmpegExecutable)) {
367     return Status(error::Code::NOT_FOUND, StrCat("FFmpeg could not be found."));
368   }
369 
370   string output_filename = io::GetTempFilename("raw");
371   string stderr_filename = io::GetTempFilename("err");
372 
373   // Create an argument list.
374   const std::vector<string> args =
375       FfmpegVideoCommandLine(filename, output_filename);
376   // Execute ffmpeg and report errors.
377   pid_t child_pid = ::fork();
378   if (child_pid < 0) {
379     return Status(error::Code::UNKNOWN,
380                   StrCat("fork failed: ", strerror(errno)));
381   }
382   if (child_pid == 0) {
383     const int fd =
384         open(stderr_filename.c_str(), O_RDWR | O_CREAT | O_APPEND, 0600);
385     if (fd < 0) {
386       const int error = errno;
387       LOG(ERROR) << "FFmpeg stderr file could not be created: "
388                  << strerror(error);
389       ::_exit(error);
390     }
391     close(STDERR_FILENO);
392     dup2(fd, STDERR_FILENO);
393     ExecuteFfmpeg(args);
394   } else {
395     int status_code;
396     if (::waitpid(child_pid, &status_code, 0) < 0) {
397       return Status(error::Code::UNKNOWN,
398                     StrCat("waitpid failed: ", strerror(errno)));
399     }
400     if (status_code) {
401       return Status(error::Code::UNKNOWN,
402                     StrCat("FFmpeg execution failed: ", status_code));
403     }
404 
405     TF_QCHECK_OK(ReadInfoFile(stderr_filename, width, height, frames))
406         << "Could not read FFmpeg stderr file: " << stderr_filename;
407 
408     string raw_data;
409     TF_QCHECK_OK(ReadFileToString(Env::Default(), output_filename, &raw_data))
410         << "Could not read FFmpeg output file: " << output_filename;
411     output_data->resize(raw_data.size());
412     std::copy_n(raw_data.data(), raw_data.size(), output_data->begin());
413 
414     TF_QCHECK_OK(Env::Default()->DeleteFile(output_filename))
415         << output_filename;
416     TF_QCHECK_OK(Env::Default()->DeleteFile(stderr_filename))
417         << stderr_filename;
418     return Status::OK();
419   }
420 }
421 }  // namespace ffmpeg
422 }  // namespace tensorflow
423