• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements utilities for interfacing with tensorflow C APIs.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/Config/config.h"
14 #if defined(LLVM_HAVE_TF_API)
15 
16 #include "llvm/ADT/Twine.h"
17 #include "llvm/Analysis/Utils/TFUtils.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/JSON.h"
20 #include "llvm/Support/ManagedStatic.h"
21 #include "llvm/Support/MemoryBuffer.h"
22 #include "llvm/Support/Path.h"
23 #include "llvm/Support/raw_ostream.h"
24 
25 #include "tensorflow/c/c_api.h"
26 #include "tensorflow/c/c_api_experimental.h"
27 
28 #include <cassert>
29 #include <numeric>
30 
31 using namespace llvm;
32 
33 namespace {
34 
35 using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>;
36 using TFSessionOptionsPtr =
37     std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>;
38 using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;
39 
40 struct TFInitializer {
TFInitializer__anon40c442650111::TFInitializer41   TFInitializer() {
42     assert(!IsInitialized && "TFInitialized should be called only once");
43     int Argc = 1;
44     const char *Name = "";
45     const char **NamePtr = &Name;
46     TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr));
47     IsInitialized = true;
48   }
49   bool IsInitialized = false;
50 };
51 
52 llvm::ManagedStatic<TFInitializer> TFLibInitializer;
53 
ensureInitTF()54 bool ensureInitTF() { return TFLibInitializer->IsInitialized; }
55 
createTFGraph()56 TFGraphPtr createTFGraph() {
57   return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
58 }
59 
createTFStatus()60 TFStatusPtr createTFStatus() {
61   return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus);
62 }
63 
createTFSessionOptions()64 TFSessionOptionsPtr createTFSessionOptions() {
65   return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions);
66 }
67 
68 /// Write the values of one tensor as a list.
69 template <typename T>
writeTensorValues(raw_ostream & OutFile,const char * TensorData,size_t ElemCount)70 void writeTensorValues(raw_ostream &OutFile, const char *TensorData,
71                        size_t ElemCount) {
72   OutFile << "[";
73   const T *TypedData = reinterpret_cast<const T *>(TensorData);
74   for (size_t I = 0; I < ElemCount; ++I) {
75     if (I > 0)
76       OutFile << ", ";
77     OutFile << TypedData[I];
78   }
79   OutFile << "]";
80 }
81 
82 /// Write a list of tensors as a sequence of TensorFlow FeatureList protobufs.
83 /// The tensors are assumed to be stored contiguously, in row-major format,
84 /// in the TensorData buffer. Each tensor has the shape given by Spec. The
85 /// feature name in the output is either the provided LoggingName, if
86 /// specified, otherwise it's the name of the tensor (as given by Spec).
writeRawTensorsAsFeatureLists(raw_ostream & OutFile,const LoggedFeatureSpec & LoggedSpec,const char * TensorData,size_t TensorCount,bool FinalReward=false)87 void writeRawTensorsAsFeatureLists(raw_ostream &OutFile,
88                                    const LoggedFeatureSpec &LoggedSpec,
89                                    const char *TensorData, size_t TensorCount,
90                                    bool FinalReward = false) {
91   const char *FieldName = "<invalid>";
92   std::function<void(const char *)> ValueWriter;
93   const auto &Spec = LoggedSpec.Spec;
94   // The 'Feature' protobuf only has 3 possible fields: float_list,
95   // int64_list, or bytes_list, so we capture int32 values as int64. We don't
96   // support any other types.
97   if (Spec.isElementType<int64_t>()) {
98     FieldName = "int64_list";
99     ValueWriter = [&](const char *Data) {
100       writeTensorValues<int64_t>(OutFile, Data, Spec.getElementCount());
101     };
102   } else if (Spec.isElementType<int32_t>()) {
103     FieldName = "int64_list";
104     ValueWriter = [&](const char *Data) {
105       writeTensorValues<int32_t>(OutFile, Data, Spec.getElementCount());
106     };
107 
108   } else if (Spec.isElementType<float>()) {
109     FieldName = "float_list";
110     ValueWriter = [&](const char *Data) {
111       writeTensorValues<float>(OutFile, Data, Spec.getElementCount());
112     };
113 
114   } else {
115     llvm_unreachable("Unsupported tensor type.");
116   }
117 
118   OutFile << "  feature_list: {\n";
119   OutFile << "    key: "
120           << "\""
121           << (LoggedSpec.LoggingName ? *LoggedSpec.LoggingName : Spec.name())
122           << "\" ";
123   OutFile << "value: {\n";
124   size_t TensorByteSize = Spec.getElementCount() * Spec.getElementByteSize();
125 
126   auto WriteFeatureProto = [&](const char *P) {
127     OutFile << "      feature: { " << FieldName << ": { value: ";
128     ValueWriter(P);
129     OutFile << " } }\n";
130   };
131 
132   const char *CurrentTensor = TensorData;
133   static int64_t Zero = 0;
134   // Write all but the last value. If this is the final reward, don't increment
135   // the CurrentTensor, and just write 0.
136   for (size_t I = 0; I < TensorCount - 1; ++I) {
137     if (FinalReward)
138       WriteFeatureProto(reinterpret_cast<const char *>(&Zero));
139     else {
140       WriteFeatureProto(CurrentTensor);
141       CurrentTensor += TensorByteSize;
142     }
143   }
144 
145   WriteFeatureProto(CurrentTensor);
146 
147   OutFile << "    }\n";
148   OutFile << "  }\n";
149 }
150 } // namespace
151 
152 namespace llvm {
153 class EvaluationResultImpl {
154 public:
EvaluationResultImpl(size_t OutputSize)155   EvaluationResultImpl(size_t OutputSize)
156       : OutputSize(OutputSize), Output(OutputSize){};
157 
~EvaluationResultImpl()158   ~EvaluationResultImpl() {
159     for (auto *P : Output)
160       if (P)
161         TF_DeleteTensor(P);
162   }
163 
164   EvaluationResultImpl(const EvaluationResultImpl &) = delete;
165   EvaluationResultImpl(EvaluationResultImpl &&Other) = delete;
getOutput()166   std::vector<TF_Tensor *> &getOutput() { return Output; }
167 
168 private:
169   const size_t OutputSize;
170   std::vector<TF_Tensor *> Output;
171 };
172 
getElementByteSize() const173 size_t TensorSpec::getElementByteSize() const {
174   return TF_DataTypeSize(static_cast<TF_DataType>(TypeIndex));
175 }
176 
TensorSpec(const std::string & Name,int Port,int TypeIndex,const std::vector<int64_t> & Shape)177 TensorSpec::TensorSpec(const std::string &Name, int Port, int TypeIndex,
178                        const std::vector<int64_t> &Shape)
179     : Name(Name), Port(Port), TypeIndex(TypeIndex), Shape(Shape),
180       ElementCount(std::accumulate(Shape.begin(), Shape.end(), 1,
181                                    std::multiplies<int64_t>())) {}
182 
getTensorSpecFromJSON(LLVMContext & Ctx,const json::Value & Value)183 Optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
184                                            const json::Value &Value) {
185   auto EmitError = [&](const llvm::Twine &Message) -> Optional<TensorSpec> {
186     std::string S;
187     llvm::raw_string_ostream OS(S);
188     OS << Value;
189     Ctx.emitError("Unable to parse JSON Value as spec (" + Message + "): " + S);
190     return None;
191   };
192   // FIXME: accept a Path as a parameter, and use it for error reporting.
193   json::Path::Root Root("tensor_spec");
194   json::ObjectMapper Mapper(Value, Root);
195   if (!Mapper)
196     return EmitError("Value is not a dict");
197 
198   std::string TensorName;
199   int TensorPort = -1;
200   std::string TensorType;
201   std::vector<int64_t> TensorShape;
202 
203   if (!Mapper.map<std::string>("name", TensorName))
204     return EmitError("'name' property not present or not a string");
205   if (!Mapper.map<std::string>("type", TensorType))
206     return EmitError("'type' property not present or not a string");
207   if (!Mapper.map<int>("port", TensorPort))
208     return EmitError("'port' property not present or not an int");
209   if (!Mapper.map<std::vector<int64_t>>("shape", TensorShape))
210     return EmitError("'shape' property not present or not an int array");
211 
212 #define PARSE_TYPE(T, E)                                                       \
213   if (TensorType == #T)                                                        \
214     return TensorSpec::createSpec<T>(TensorName, TensorShape, TensorPort);
215   TFUTILS_SUPPORTED_TYPES(PARSE_TYPE)
216 #undef PARSE_TYPE
217   return None;
218 }
219 
220 Optional<std::vector<LoggedFeatureSpec>>
loadOutputSpecs(LLVMContext & Ctx,StringRef ExpectedDecisionName,StringRef ModelPath,StringRef SpecFileOverride)221 loadOutputSpecs(LLVMContext &Ctx, StringRef ExpectedDecisionName,
222                 StringRef ModelPath, StringRef SpecFileOverride) {
223   SmallVector<char, 128> OutputSpecsPath;
224   StringRef FileName = SpecFileOverride;
225   if (FileName.empty()) {
226     llvm::sys::path::append(OutputSpecsPath, ModelPath, "output_spec.json");
227     FileName = {OutputSpecsPath.data(), OutputSpecsPath.size()};
228   }
229 
230   auto BufferOrError = MemoryBuffer::getFileOrSTDIN(FileName);
231   if (!BufferOrError) {
232     Ctx.emitError("Error opening output specs file: " + FileName + " : " +
233                   BufferOrError.getError().message());
234     return None;
235   }
236   auto ParsedJSONValues = json::parse(BufferOrError.get()->getBuffer());
237   if (!ParsedJSONValues) {
238     Ctx.emitError("Could not parse specs file: " + FileName);
239     return None;
240   }
241   auto ValuesArray = ParsedJSONValues->getAsArray();
242   if (!ValuesArray) {
243     Ctx.emitError("Expected an array of {tensor_spec:<TensorSpec>, "
244                   "logging_name:<name>} dictionaries");
245     return None;
246   }
247   std::vector<LoggedFeatureSpec> Ret;
248   for (const auto &Value : *ValuesArray)
249     if (const auto *Obj = Value.getAsObject())
250       if (const auto *SpecPart = Obj->get("tensor_spec"))
251         if (auto TensorSpec = getTensorSpecFromJSON(Ctx, *SpecPart))
252           if (auto LoggingName = Obj->getString("logging_name")) {
253             if (!TensorSpec->isElementType<int64_t>() &&
254                 !TensorSpec->isElementType<int32_t>() &&
255                 !TensorSpec->isElementType<float>()) {
256               Ctx.emitError(
257                   "Only int64, int32, and float tensors are supported. "
258                   "Found unsupported type for tensor named " +
259                   TensorSpec->name());
260               return None;
261             }
262             Ret.push_back({*TensorSpec, LoggingName->str()});
263           }
264 
265   if (ValuesArray->size() != Ret.size()) {
266     Ctx.emitError(
267         "Unable to parse output spec. It should be a json file containing an "
268         "array of dictionaries. Each dictionary must have a 'tensor_spec' key, "
269         "with a json object describing a TensorSpec; and a 'logging_name' key, "
270         "which is a string to use as name when logging this tensor in the "
271         "training log.");
272     return None;
273   }
274   if (Ret.empty() || *Ret[0].LoggingName != ExpectedDecisionName) {
275     Ctx.emitError("The first output spec must describe the decision tensor, "
276                   "and must have the logging_name " +
277                   StringRef(ExpectedDecisionName));
278     return None;
279   }
280   return Ret;
281 }
282 
283 class TFModelEvaluatorImpl {
284 public:
285   TFModelEvaluatorImpl(StringRef SavedModelPath,
286                        const std::vector<TensorSpec> &InputSpecs,
287                        function_ref<TensorSpec(size_t)> GetOutputSpecs,
288                        size_t OutputSpecsSize, const char *Tags);
289 
isValid() const290   bool isValid() const { return IsValid; }
OutputSize() const291   size_t OutputSize() const { return OutputFeed.size(); }
292 
evaluate(TF_Tensor ** Output,TF_Status * Status)293   void evaluate(TF_Tensor **Output, TF_Status *Status) {
294     TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(),
295                   Input.size(), OutputFeed.data(), Output, OutputFeed.size(),
296                   nullptr, 0, nullptr, Status);
297   }
298 
299   void initInput(size_t Index, TF_DataType Type,
300                  const std::vector<int64_t> &Dimensions);
getInput() const301   const std::vector<TF_Tensor *> &getInput() const { return Input; }
302 
303   ~TFModelEvaluatorImpl();
304 
305 private:
306   /// The objects necessary for carrying out an evaluation of the SavedModel.
307   /// They are expensive to set up, and we maintain them accross all the
308   /// evaluations of the model.
309   TF_Session *Session = nullptr;
310   TFGraphPtr Graph;
311   TFSessionOptionsPtr Options;
312 
313   /// The specification of the input nodes.
314   std::vector<TF_Output> InputFeed;
315 
316   /// The input tensors. They must match by index of the corresponding InputFeed
317   /// value. We set up the tensors once and just mutate theirs scalars before
318   /// each evaluation. The input tensors keep their value after an evaluation.
319   std::vector<TF_Tensor *> Input;
320 
321   /// The specification of the output nodes. When evaluating, the tensors in the
322   /// output tensor vector must match by index the corresponding element in the
323   /// OutputFeed.
324   std::vector<TF_Output> OutputFeed;
325 
invalidate()326   void invalidate() { IsValid = false; }
327 
328   bool IsValid = true;
329 
330   /// Reusable utility for ensuring we can bind the requested Name to a node in
331   /// the SavedModel Graph.
332   bool checkReportAndInvalidate(const TF_Output &Output,
333                                 const TensorSpec &OutputSpec);
334 };
335 } // namespace llvm
336 
TFModelEvaluatorImpl(StringRef SavedModelPath,const std::vector<TensorSpec> & InputSpecs,function_ref<TensorSpec (size_t)> GetOutputSpecs,size_t OutputSpecsSize,const char * Tags="serve")337 TFModelEvaluatorImpl::TFModelEvaluatorImpl(
338     StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs,
339     function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize,
340     const char *Tags = "serve")
341     : Graph(createTFGraph()), Options(createTFSessionOptions()),
342       InputFeed(InputSpecs.size()), Input(InputSpecs.size()),
343       OutputFeed(OutputSpecsSize) {
344   if (!ensureInitTF()) {
345     errs() << "Tensorflow should have been initialized";
346     return;
347   }
348   auto Status = createTFStatus();
349 
350   Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr,
351                                          SavedModelPath.str().c_str(), &Tags, 1,
352                                          Graph.get(), nullptr, Status.get());
353   if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
354     errs() << TF_Message(Status.get());
355     invalidate();
356   }
357   for (size_t I = 0; I < InputSpecs.size(); ++I) {
358     auto &InputSpec = InputSpecs[I];
359     InputFeed[I] = {
360         TF_GraphOperationByName(Graph.get(), (InputSpec.name()).c_str()),
361         InputSpec.port()};
362     if (!checkReportAndInvalidate(InputFeed[I], InputSpec))
363       return;
364     initInput(I, static_cast<TF_DataType>(InputSpec.typeIndex()),
365               InputSpec.shape());
366   }
367   for (size_t I = 0; I < OutputSpecsSize; ++I) {
368     auto OutputSpec = GetOutputSpecs(I);
369     OutputFeed[I] = {
370         TF_GraphOperationByName(Graph.get(), (OutputSpec.name()).c_str()),
371         OutputSpec.port()};
372     if (!checkReportAndInvalidate(OutputFeed[I], OutputSpec))
373       return;
374   }
375 }
376 
TFModelEvaluator(StringRef SavedModelPath,const std::vector<TensorSpec> & InputSpecs,function_ref<TensorSpec (size_t)> GetOutputSpecs,size_t OutputSpecsSize,const char * Tags)377 TFModelEvaluator::TFModelEvaluator(
378     StringRef SavedModelPath, const std::vector<TensorSpec> &InputSpecs,
379     function_ref<TensorSpec(size_t)> GetOutputSpecs, size_t OutputSpecsSize,
380     const char *Tags)
381     : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputSpecs, GetOutputSpecs,
382                                     OutputSpecsSize, Tags)) {
383   if (!Impl->isValid())
384     Impl.reset();
385 }
386 
TFModelEvaluator(StringRef SavedModelPath,const std::vector<TensorSpec> & InputSpecs,const std::vector<TensorSpec> & OutputSpecs,const char * Tags)387 TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
388                                    const std::vector<TensorSpec> &InputSpecs,
389                                    const std::vector<TensorSpec> &OutputSpecs,
390                                    const char *Tags)
391     : TFModelEvaluator(
392           SavedModelPath, InputSpecs, [&](size_t I) { return OutputSpecs[I]; },
393           OutputSpecs.size(), Tags) {}
394 
~TFModelEvaluatorImpl()395 TFModelEvaluatorImpl::~TFModelEvaluatorImpl() {
396   for (auto *T : Input) {
397     TF_DeleteTensor(T);
398   }
399   if (Session == nullptr)
400     return;
401   auto Status = createTFStatus();
402   TF_DeleteSession(Session, Status.get());
403   Session = nullptr;
404   if (TF_GetCode(Status.get()) != TF_Code::TF_OK)
405     errs() << "Could not delete TF session";
406 }
407 
checkReportAndInvalidate(const TF_Output & Output,const TensorSpec & OutputSpec)408 bool TFModelEvaluatorImpl::checkReportAndInvalidate(
409     const TF_Output &Output, const TensorSpec &OutputSpec) {
410   if (Output.oper)
411     return true;
412   errs() << "Could not find TF_Output named: " + OutputSpec.name();
413   IsValid = false;
414   return IsValid;
415 }
416 
evaluate()417 Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() {
418   if (!isValid())
419     return None;
420   std::unique_ptr<EvaluationResultImpl> Ret =
421       std::make_unique<EvaluationResultImpl>(Impl->OutputSize());
422   auto Status = createTFStatus();
423   Impl->evaluate(Ret->getOutput().data(), Status.get());
424   if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
425     errs() << TF_Message(Status.get());
426     Impl.reset();
427     return None;
428   }
429   return EvaluationResult(std::move(Ret));
430 }
431 
initInput(size_t Index,TF_DataType Type,const std::vector<int64_t> & Dimensions)432 void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type,
433                                      const std::vector<int64_t> &Dimensions) {
434   int64_t TotalSize = TF_DataTypeSize(Type);
435   for (auto &D : Dimensions)
436     TotalSize *= D;
437 
438   Input[Index] =
439       TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize);
440   std::memset(TF_TensorData(Input[Index]), 0, TotalSize);
441 }
442 
getUntypedInput(size_t Index)443 void *TFModelEvaluator::getUntypedInput(size_t Index) {
444   return TF_TensorData(Impl->getInput()[Index]);
445 }
446 
EvaluationResult(std::unique_ptr<EvaluationResultImpl> Impl)447 TFModelEvaluator::EvaluationResult::EvaluationResult(
448     std::unique_ptr<EvaluationResultImpl> Impl)
449     : Impl(std::move(Impl)) {}
450 
EvaluationResult(EvaluationResult && Other)451 TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other)
452     : Impl(std::move(Other.Impl)) {}
453 
454 TFModelEvaluator::EvaluationResult &
operator =(EvaluationResult && Other)455 TFModelEvaluator::EvaluationResult::operator=(EvaluationResult &&Other) {
456   Impl = std::move(Other.Impl);
457   return *this;
458 }
459 
getUntypedTensorValue(size_t Index)460 void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) {
461   return TF_TensorData(Impl->getOutput()[Index]);
462 }
463 
464 const void *
getUntypedTensorValue(size_t Index) const465 TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) const {
466   return TF_TensorData(Impl->getOutput()[Index]);
467 }
468 
469 #define TFUTILS_GETDATATYPE_IMPL(T, E)                                         \
470   template <> int TensorSpec::getDataType<T>() { return E; }
471 
TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL)472 TFUTILS_SUPPORTED_TYPES(TFUTILS_GETDATATYPE_IMPL)
473 
474 #undef TFUTILS_GETDATATYPE_IMPL
475 
476 TFModelEvaluator::EvaluationResult::~EvaluationResult() {}
~TFModelEvaluator()477 TFModelEvaluator::~TFModelEvaluator() {}
478 
print(raw_ostream & OS)479 void Logger::print(raw_ostream &OS) {
480   if (RawLogData.empty())
481     return;
482   if (RawLogData[0].empty())
483     return;
484   size_t Tensor0Size = FeatureSpecs[0].Spec.getElementCount() *
485                        FeatureSpecs[0].Spec.getElementByteSize();
486   size_t NumberOfRecords = RawLogData[0].size() / Tensor0Size;
487   if (NumberOfRecords == 0)
488     return;
489   size_t RewardSize =
490       RewardSpec.getElementCount() * RewardSpec.getElementByteSize();
491   size_t NumberOfRewards = RawLogData.back().size() / RewardSize;
492 
493   OS << "feature_lists: {\n";
494   for (size_t I = 0; I < FeatureSpecs.size(); ++I)
495     writeRawTensorsAsFeatureLists(OS, FeatureSpecs[I], RawLogData[I].data(),
496                                   NumberOfRecords);
497 
498   if (IncludeReward)
499     writeRawTensorsAsFeatureLists(OS, {RewardSpec, None},
500                                   RawLogData.back().data(), NumberOfRecords,
501                                   NumberOfRewards == 1);
502 
503   OS << "}\n";
504 }
505 #endif // defined(LLVM_HAVE_TF_API)
506