//===-- PythonDataObjects.cpp ---------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #include "lldb/Host/Config.h" #if LLDB_ENABLE_PYTHON #include "PythonDataObjects.h" #include "ScriptInterpreterPython.h" #include "lldb/Host/File.h" #include "lldb/Host/FileSystem.h" #include "lldb/Interpreter/ScriptInterpreter.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/Stream.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Errno.h" #include using namespace lldb_private; using namespace lldb; using namespace lldb_private::python; using llvm::cantFail; using llvm::Error; using llvm::Expected; using llvm::Twine; template <> Expected python::As(Expected &&obj) { if (!obj) return obj.takeError(); return obj.get().IsTrue(); } template <> Expected python::As(Expected &&obj) { if (!obj) return obj.takeError(); return obj->AsLongLong(); } template <> Expected python::As(Expected &&obj) { if (!obj) return obj.takeError(); return obj->AsUnsignedLongLong(); } template <> Expected python::As(Expected &&obj) { if (!obj) return obj.takeError(); PyObject *str_obj = PyObject_Str(obj.get().get()); if (!obj) return llvm::make_error(); auto str = Take(str_obj); auto utf8 = str.AsUTF8(); if (!utf8) return utf8.takeError(); return std::string(utf8.get()); } Expected PythonObject::AsLongLong() const { if (!m_py_obj) return nullDeref(); #if PY_MAJOR_VERSION < 3 if (!PyLong_Check(m_py_obj)) { PythonInteger i(PyRefType::Borrowed, m_py_obj); return i.AsLongLong(); } #endif assert(!PyErr_Occurred()); long long r = PyLong_AsLongLong(m_py_obj); if (PyErr_Occurred()) return exception(); return r; } Expected PythonObject::AsUnsignedLongLong() const { if (!m_py_obj) return nullDeref(); #if PY_MAJOR_VERSION < 3 if (!PyLong_Check(m_py_obj)) { PythonInteger i(PyRefType::Borrowed, m_py_obj); return i.AsUnsignedLongLong(); } #endif assert(!PyErr_Occurred()); long long r = PyLong_AsUnsignedLongLong(m_py_obj); if (PyErr_Occurred()) return exception(); return r; } // wraps on overflow, instead of raising an error. Expected PythonObject::AsModuloUnsignedLongLong() const { if (!m_py_obj) return nullDeref(); #if PY_MAJOR_VERSION < 3 if (!PyLong_Check(m_py_obj)) { PythonInteger i(PyRefType::Borrowed, m_py_obj); return i.AsModuloUnsignedLongLong(); } #endif assert(!PyErr_Occurred()); unsigned long long r = PyLong_AsUnsignedLongLongMask(m_py_obj); if (PyErr_Occurred()) return exception(); return r; } void StructuredPythonObject::Serialize(llvm::json::OStream &s) const { s.value(llvm::formatv("Python Obj: {0:X}", GetValue()).str()); } // PythonObject void PythonObject::Dump(Stream &strm) const { if (m_py_obj) { FILE *file = llvm::sys::RetryAfterSignal(nullptr, ::tmpfile); if (file) { ::PyObject_Print(m_py_obj, file, 0); const long length = ftell(file); if (length) { ::rewind(file); std::vector file_contents(length, '\0'); const size_t length_read = ::fread(file_contents.data(), 1, file_contents.size(), file); if (length_read > 0) strm.Write(file_contents.data(), length_read); } ::fclose(file); } } else strm.PutCString("NULL"); } PyObjectType PythonObject::GetObjectType() const { if (!IsAllocated()) return PyObjectType::None; if (PythonModule::Check(m_py_obj)) return PyObjectType::Module; if (PythonList::Check(m_py_obj)) return PyObjectType::List; if (PythonTuple::Check(m_py_obj)) return PyObjectType::Tuple; if (PythonDictionary::Check(m_py_obj)) return PyObjectType::Dictionary; if (PythonString::Check(m_py_obj)) return PyObjectType::String; #if PY_MAJOR_VERSION >= 3 if (PythonBytes::Check(m_py_obj)) return PyObjectType::Bytes; #endif if (PythonByteArray::Check(m_py_obj)) return PyObjectType::ByteArray; if (PythonBoolean::Check(m_py_obj)) return PyObjectType::Boolean; if (PythonInteger::Check(m_py_obj)) return PyObjectType::Integer; if (PythonFile::Check(m_py_obj)) return PyObjectType::File; if (PythonCallable::Check(m_py_obj)) return PyObjectType::Callable; return PyObjectType::Unknown; } PythonString PythonObject::Repr() const { if (!m_py_obj) return PythonString(); PyObject *repr = PyObject_Repr(m_py_obj); if (!repr) return PythonString(); return PythonString(PyRefType::Owned, repr); } PythonString PythonObject::Str() const { if (!m_py_obj) return PythonString(); PyObject *str = PyObject_Str(m_py_obj); if (!str) return PythonString(); return PythonString(PyRefType::Owned, str); } PythonObject PythonObject::ResolveNameWithDictionary(llvm::StringRef name, const PythonDictionary &dict) { size_t dot_pos = name.find('.'); llvm::StringRef piece = name.substr(0, dot_pos); PythonObject result = dict.GetItemForKey(PythonString(piece)); if (dot_pos == llvm::StringRef::npos) { // There was no dot, we're done. return result; } // There was a dot. The remaining portion of the name should be looked up in // the context of the object that was found in the dictionary. return result.ResolveName(name.substr(dot_pos + 1)); } PythonObject PythonObject::ResolveName(llvm::StringRef name) const { // Resolve the name in the context of the specified object. If, for example, // `this` refers to a PyModule, then this will look for `name` in this // module. If `this` refers to a PyType, then it will resolve `name` as an // attribute of that type. If `this` refers to an instance of an object, // then it will resolve `name` as the value of the specified field. // // This function handles dotted names so that, for example, if `m_py_obj` // refers to the `sys` module, and `name` == "path.append", then it will find // the function `sys.path.append`. size_t dot_pos = name.find('.'); if (dot_pos == llvm::StringRef::npos) { // No dots in the name, we should be able to find the value immediately as // an attribute of `m_py_obj`. return GetAttributeValue(name); } // Look up the first piece of the name, and resolve the rest as a child of // that. PythonObject parent = ResolveName(name.substr(0, dot_pos)); if (!parent.IsAllocated()) return PythonObject(); // Tail recursion.. should be optimized by the compiler return parent.ResolveName(name.substr(dot_pos + 1)); } bool PythonObject::HasAttribute(llvm::StringRef attr) const { if (!IsValid()) return false; PythonString py_attr(attr); return !!PyObject_HasAttr(m_py_obj, py_attr.get()); } PythonObject PythonObject::GetAttributeValue(llvm::StringRef attr) const { if (!IsValid()) return PythonObject(); PythonString py_attr(attr); if (!PyObject_HasAttr(m_py_obj, py_attr.get())) return PythonObject(); return PythonObject(PyRefType::Owned, PyObject_GetAttr(m_py_obj, py_attr.get())); } StructuredData::ObjectSP PythonObject::CreateStructuredObject() const { switch (GetObjectType()) { case PyObjectType::Dictionary: return PythonDictionary(PyRefType::Borrowed, m_py_obj) .CreateStructuredDictionary(); case PyObjectType::Boolean: return PythonBoolean(PyRefType::Borrowed, m_py_obj) .CreateStructuredBoolean(); case PyObjectType::Integer: return PythonInteger(PyRefType::Borrowed, m_py_obj) .CreateStructuredInteger(); case PyObjectType::List: return PythonList(PyRefType::Borrowed, m_py_obj).CreateStructuredArray(); case PyObjectType::String: return PythonString(PyRefType::Borrowed, m_py_obj).CreateStructuredString(); case PyObjectType::Bytes: return PythonBytes(PyRefType::Borrowed, m_py_obj).CreateStructuredString(); case PyObjectType::ByteArray: return PythonByteArray(PyRefType::Borrowed, m_py_obj) .CreateStructuredString(); case PyObjectType::None: return StructuredData::ObjectSP(); default: return StructuredData::ObjectSP(new StructuredPythonObject(m_py_obj)); } } // PythonString PythonBytes::PythonBytes(llvm::ArrayRef bytes) { SetBytes(bytes); } PythonBytes::PythonBytes(const uint8_t *bytes, size_t length) { SetBytes(llvm::ArrayRef(bytes, length)); } bool PythonBytes::Check(PyObject *py_obj) { if (!py_obj) return false; return PyBytes_Check(py_obj); } llvm::ArrayRef PythonBytes::GetBytes() const { if (!IsValid()) return llvm::ArrayRef(); Py_ssize_t size; char *c; PyBytes_AsStringAndSize(m_py_obj, &c, &size); return llvm::ArrayRef(reinterpret_cast(c), size); } size_t PythonBytes::GetSize() const { if (!IsValid()) return 0; return PyBytes_Size(m_py_obj); } void PythonBytes::SetBytes(llvm::ArrayRef bytes) { const char *data = reinterpret_cast(bytes.data()); *this = Take(PyBytes_FromStringAndSize(data, bytes.size())); } StructuredData::StringSP PythonBytes::CreateStructuredString() const { StructuredData::StringSP result(new StructuredData::String); Py_ssize_t size; char *c; PyBytes_AsStringAndSize(m_py_obj, &c, &size); result->SetValue(std::string(c, size)); return result; } PythonByteArray::PythonByteArray(llvm::ArrayRef bytes) : PythonByteArray(bytes.data(), bytes.size()) {} PythonByteArray::PythonByteArray(const uint8_t *bytes, size_t length) { const char *str = reinterpret_cast(bytes); *this = Take(PyByteArray_FromStringAndSize(str, length)); } bool PythonByteArray::Check(PyObject *py_obj) { if (!py_obj) return false; return PyByteArray_Check(py_obj); } llvm::ArrayRef PythonByteArray::GetBytes() const { if (!IsValid()) return llvm::ArrayRef(); char *c = PyByteArray_AsString(m_py_obj); size_t size = GetSize(); return llvm::ArrayRef(reinterpret_cast(c), size); } size_t PythonByteArray::GetSize() const { if (!IsValid()) return 0; return PyByteArray_Size(m_py_obj); } StructuredData::StringSP PythonByteArray::CreateStructuredString() const { StructuredData::StringSP result(new StructuredData::String); llvm::ArrayRef bytes = GetBytes(); const char *str = reinterpret_cast(bytes.data()); result->SetValue(std::string(str, bytes.size())); return result; } // PythonString Expected PythonString::FromUTF8(llvm::StringRef string) { #if PY_MAJOR_VERSION >= 3 PyObject *str = PyUnicode_FromStringAndSize(string.data(), string.size()); #else PyObject *str = PyString_FromStringAndSize(string.data(), string.size()); #endif if (!str) return llvm::make_error(); return Take(str); } PythonString::PythonString(llvm::StringRef string) { SetString(string); } bool PythonString::Check(PyObject *py_obj) { if (!py_obj) return false; if (PyUnicode_Check(py_obj)) return true; #if PY_MAJOR_VERSION < 3 if (PyString_Check(py_obj)) return true; #endif return false; } void PythonString::Convert(PyRefType &type, PyObject *&py_obj) { #if PY_MAJOR_VERSION < 3 // In Python 2, Don't store PyUnicode objects directly, because we need // access to their underlying character buffers which Python 2 doesn't // provide. if (PyUnicode_Check(py_obj)) { PyObject *s = PyUnicode_AsUTF8String(py_obj); if (s == nullptr) { PyErr_Clear(); if (type == PyRefType::Owned) Py_DECREF(py_obj); return; } if (type == PyRefType::Owned) Py_DECREF(py_obj); else type = PyRefType::Owned; py_obj = s; } #endif } llvm::StringRef PythonString::GetString() const { auto s = AsUTF8(); if (!s) { llvm::consumeError(s.takeError()); return llvm::StringRef(""); } return s.get(); } Expected PythonString::AsUTF8() const { if (!IsValid()) return nullDeref(); Py_ssize_t size; const char *data; #if PY_MAJOR_VERSION >= 3 data = PyUnicode_AsUTF8AndSize(m_py_obj, &size); #else char *c = NULL; int r = PyString_AsStringAndSize(m_py_obj, &c, &size); if (r < 0) c = NULL; data = c; #endif if (!data) return exception(); return llvm::StringRef(data, size); } size_t PythonString::GetSize() const { if (IsValid()) { #if PY_MAJOR_VERSION >= 3 #if PY_MINOR_VERSION >= 3 return PyUnicode_GetLength(m_py_obj); #else return PyUnicode_GetSize(m_py_obj); #endif #else return PyString_Size(m_py_obj); #endif } return 0; } void PythonString::SetString(llvm::StringRef string) { auto s = FromUTF8(string); if (!s) { llvm::consumeError(s.takeError()); Reset(); } else { *this = std::move(s.get()); } } StructuredData::StringSP PythonString::CreateStructuredString() const { StructuredData::StringSP result(new StructuredData::String); result->SetValue(GetString()); return result; } // PythonInteger PythonInteger::PythonInteger(int64_t value) { SetInteger(value); } bool PythonInteger::Check(PyObject *py_obj) { if (!py_obj) return false; #if PY_MAJOR_VERSION >= 3 // Python 3 does not have PyInt_Check. There is only one type of integral // value, long. return PyLong_Check(py_obj); #else return PyLong_Check(py_obj) || PyInt_Check(py_obj); #endif } void PythonInteger::Convert(PyRefType &type, PyObject *&py_obj) { #if PY_MAJOR_VERSION < 3 // Always store this as a PyLong, which makes interoperability between Python // 2.x and Python 3.x easier. This is only necessary in 2.x, since 3.x // doesn't even have a PyInt. if (PyInt_Check(py_obj)) { // Since we converted the original object to a different type, the new // object is an owned object regardless of the ownership semantics // requested by the user. long long value = PyInt_AsLong(py_obj); PyObject *l = nullptr; if (!PyErr_Occurred()) l = PyLong_FromLongLong(value); if (l == nullptr) { PyErr_Clear(); if (type == PyRefType::Owned) Py_DECREF(py_obj); return; } if (type == PyRefType::Owned) Py_DECREF(py_obj); else type = PyRefType::Owned; py_obj = l; } #endif } void PythonInteger::SetInteger(int64_t value) { *this = Take(PyLong_FromLongLong(value)); } StructuredData::IntegerSP PythonInteger::CreateStructuredInteger() const { StructuredData::IntegerSP result(new StructuredData::Integer); // FIXME this is really not ideal. Errors are silently converted to 0 // and overflows are silently wrapped. But we'd need larger changes // to StructuredData to fix it, so that's how it is for now. llvm::Expected value = AsModuloUnsignedLongLong(); if (!value) { llvm::consumeError(value.takeError()); result->SetValue(0); } else { result->SetValue(value.get()); } return result; } // PythonBoolean PythonBoolean::PythonBoolean(bool value) { SetValue(value); } bool PythonBoolean::Check(PyObject *py_obj) { return py_obj ? PyBool_Check(py_obj) : false; } bool PythonBoolean::GetValue() const { return m_py_obj ? PyObject_IsTrue(m_py_obj) : false; } void PythonBoolean::SetValue(bool value) { *this = Take(PyBool_FromLong(value)); } StructuredData::BooleanSP PythonBoolean::CreateStructuredBoolean() const { StructuredData::BooleanSP result(new StructuredData::Boolean); result->SetValue(GetValue()); return result; } // PythonList PythonList::PythonList(PyInitialValue value) { if (value == PyInitialValue::Empty) *this = Take(PyList_New(0)); } PythonList::PythonList(int list_size) { *this = Take(PyList_New(list_size)); } bool PythonList::Check(PyObject *py_obj) { if (!py_obj) return false; return PyList_Check(py_obj); } uint32_t PythonList::GetSize() const { if (IsValid()) return PyList_GET_SIZE(m_py_obj); return 0; } PythonObject PythonList::GetItemAtIndex(uint32_t index) const { if (IsValid()) return PythonObject(PyRefType::Borrowed, PyList_GetItem(m_py_obj, index)); return PythonObject(); } void PythonList::SetItemAtIndex(uint32_t index, const PythonObject &object) { if (IsAllocated() && object.IsValid()) { // PyList_SetItem is documented to "steal" a reference, so we need to // convert it to an owned reference by incrementing it. Py_INCREF(object.get()); PyList_SetItem(m_py_obj, index, object.get()); } } void PythonList::AppendItem(const PythonObject &object) { if (IsAllocated() && object.IsValid()) { // `PyList_Append` does *not* steal a reference, so do not call `Py_INCREF` // here like we do with `PyList_SetItem`. PyList_Append(m_py_obj, object.get()); } } StructuredData::ArraySP PythonList::CreateStructuredArray() const { StructuredData::ArraySP result(new StructuredData::Array); uint32_t count = GetSize(); for (uint32_t i = 0; i < count; ++i) { PythonObject obj = GetItemAtIndex(i); result->AddItem(obj.CreateStructuredObject()); } return result; } // PythonTuple PythonTuple::PythonTuple(PyInitialValue value) { if (value == PyInitialValue::Empty) *this = Take(PyTuple_New(0)); } PythonTuple::PythonTuple(int tuple_size) { *this = Take(PyTuple_New(tuple_size)); } PythonTuple::PythonTuple(std::initializer_list objects) { m_py_obj = PyTuple_New(objects.size()); uint32_t idx = 0; for (auto object : objects) { if (object.IsValid()) SetItemAtIndex(idx, object); idx++; } } PythonTuple::PythonTuple(std::initializer_list objects) { m_py_obj = PyTuple_New(objects.size()); uint32_t idx = 0; for (auto py_object : objects) { PythonObject object(PyRefType::Borrowed, py_object); if (object.IsValid()) SetItemAtIndex(idx, object); idx++; } } bool PythonTuple::Check(PyObject *py_obj) { if (!py_obj) return false; return PyTuple_Check(py_obj); } uint32_t PythonTuple::GetSize() const { if (IsValid()) return PyTuple_GET_SIZE(m_py_obj); return 0; } PythonObject PythonTuple::GetItemAtIndex(uint32_t index) const { if (IsValid()) return PythonObject(PyRefType::Borrowed, PyTuple_GetItem(m_py_obj, index)); return PythonObject(); } void PythonTuple::SetItemAtIndex(uint32_t index, const PythonObject &object) { if (IsAllocated() && object.IsValid()) { // PyTuple_SetItem is documented to "steal" a reference, so we need to // convert it to an owned reference by incrementing it. Py_INCREF(object.get()); PyTuple_SetItem(m_py_obj, index, object.get()); } } StructuredData::ArraySP PythonTuple::CreateStructuredArray() const { StructuredData::ArraySP result(new StructuredData::Array); uint32_t count = GetSize(); for (uint32_t i = 0; i < count; ++i) { PythonObject obj = GetItemAtIndex(i); result->AddItem(obj.CreateStructuredObject()); } return result; } // PythonDictionary PythonDictionary::PythonDictionary(PyInitialValue value) { if (value == PyInitialValue::Empty) *this = Take(PyDict_New()); } bool PythonDictionary::Check(PyObject *py_obj) { if (!py_obj) return false; return PyDict_Check(py_obj); } uint32_t PythonDictionary::GetSize() const { if (IsValid()) return PyDict_Size(m_py_obj); return 0; } PythonList PythonDictionary::GetKeys() const { if (IsValid()) return PythonList(PyRefType::Owned, PyDict_Keys(m_py_obj)); return PythonList(PyInitialValue::Invalid); } PythonObject PythonDictionary::GetItemForKey(const PythonObject &key) const { auto item = GetItem(key); if (!item) { llvm::consumeError(item.takeError()); return PythonObject(); } return std::move(item.get()); } Expected PythonDictionary::GetItem(const PythonObject &key) const { if (!IsValid()) return nullDeref(); #if PY_MAJOR_VERSION >= 3 PyObject *o = PyDict_GetItemWithError(m_py_obj, key.get()); if (PyErr_Occurred()) return exception(); #else PyObject *o = PyDict_GetItem(m_py_obj, key.get()); #endif if (!o) return keyError(); return Retain(o); } Expected PythonDictionary::GetItem(const Twine &key) const { if (!IsValid()) return nullDeref(); PyObject *o = PyDict_GetItemString(m_py_obj, NullTerminated(key)); if (PyErr_Occurred()) return exception(); if (!o) return keyError(); return Retain(o); } Error PythonDictionary::SetItem(const PythonObject &key, const PythonObject &value) const { if (!IsValid() || !value.IsValid()) return nullDeref(); int r = PyDict_SetItem(m_py_obj, key.get(), value.get()); if (r < 0) return exception(); return Error::success(); } Error PythonDictionary::SetItem(const Twine &key, const PythonObject &value) const { if (!IsValid() || !value.IsValid()) return nullDeref(); int r = PyDict_SetItemString(m_py_obj, NullTerminated(key), value.get()); if (r < 0) return exception(); return Error::success(); } void PythonDictionary::SetItemForKey(const PythonObject &key, const PythonObject &value) { Error error = SetItem(key, value); if (error) llvm::consumeError(std::move(error)); } StructuredData::DictionarySP PythonDictionary::CreateStructuredDictionary() const { StructuredData::DictionarySP result(new StructuredData::Dictionary); PythonList keys(GetKeys()); uint32_t num_keys = keys.GetSize(); for (uint32_t i = 0; i < num_keys; ++i) { PythonObject key = keys.GetItemAtIndex(i); PythonObject value = GetItemForKey(key); StructuredData::ObjectSP structured_value = value.CreateStructuredObject(); result->AddItem(key.Str().GetString(), structured_value); } return result; } PythonModule PythonModule::BuiltinsModule() { #if PY_MAJOR_VERSION >= 3 return AddModule("builtins"); #else return AddModule("__builtin__"); #endif } PythonModule PythonModule::MainModule() { return AddModule("__main__"); } PythonModule PythonModule::AddModule(llvm::StringRef module) { std::string str = module.str(); return PythonModule(PyRefType::Borrowed, PyImport_AddModule(str.c_str())); } Expected PythonModule::Import(const Twine &name) { PyObject *mod = PyImport_ImportModule(NullTerminated(name)); if (!mod) return exception(); return Take(mod); } Expected PythonModule::Get(const Twine &name) { if (!IsValid()) return nullDeref(); PyObject *dict = PyModule_GetDict(m_py_obj); if (!dict) return exception(); PyObject *item = PyDict_GetItemString(dict, NullTerminated(name)); if (!item) return exception(); return Retain(item); } bool PythonModule::Check(PyObject *py_obj) { if (!py_obj) return false; return PyModule_Check(py_obj); } PythonDictionary PythonModule::GetDictionary() const { if (!IsValid()) return PythonDictionary(); return Retain(PyModule_GetDict(m_py_obj)); } bool PythonCallable::Check(PyObject *py_obj) { if (!py_obj) return false; return PyCallable_Check(py_obj); } #if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 3 static const char get_arg_info_script[] = R"( from inspect import signature, Parameter, ismethod from collections import namedtuple ArgInfo = namedtuple('ArgInfo', ['count', 'has_varargs']) def main(f): count = 0 varargs = False for parameter in signature(f).parameters.values(): kind = parameter.kind if kind in (Parameter.POSITIONAL_ONLY, Parameter.POSITIONAL_OR_KEYWORD): count += 1 elif kind == Parameter.VAR_POSITIONAL: varargs = True elif kind in (Parameter.KEYWORD_ONLY, Parameter.VAR_KEYWORD): pass else: raise Exception(f'unknown parameter kind: {kind}') return ArgInfo(count, varargs) )"; #endif Expected PythonCallable::GetArgInfo() const { ArgInfo result = {}; if (!IsValid()) return nullDeref(); #if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 3 // no need to synchronize access to this global, we already have the GIL static PythonScript get_arg_info(get_arg_info_script); Expected pyarginfo = get_arg_info(*this); if (!pyarginfo) return pyarginfo.takeError(); long long count = cantFail(As(pyarginfo.get().GetAttribute("count"))); bool has_varargs = cantFail(As(pyarginfo.get().GetAttribute("has_varargs"))); result.max_positional_args = has_varargs ? ArgInfo::UNBOUNDED : count; #else PyObject *py_func_obj; bool is_bound_method = false; bool is_class = false; if (PyType_Check(m_py_obj) || PyClass_Check(m_py_obj)) { auto init = GetAttribute("__init__"); if (!init) return init.takeError(); py_func_obj = init.get().get(); is_class = true; } else { py_func_obj = m_py_obj; } if (PyMethod_Check(py_func_obj)) { py_func_obj = PyMethod_GET_FUNCTION(py_func_obj); PythonObject im_self = GetAttributeValue("im_self"); if (im_self.IsValid() && !im_self.IsNone()) is_bound_method = true; } else { // see if this is a callable object with an __call__ method if (!PyFunction_Check(py_func_obj)) { PythonObject __call__ = GetAttributeValue("__call__"); if (__call__.IsValid()) { auto __callable__ = __call__.AsType(); if (__callable__.IsValid()) { py_func_obj = PyMethod_GET_FUNCTION(__callable__.get()); PythonObject im_self = __callable__.GetAttributeValue("im_self"); if (im_self.IsValid() && !im_self.IsNone()) is_bound_method = true; } } } } if (!py_func_obj) return result; PyCodeObject *code = (PyCodeObject *)PyFunction_GET_CODE(py_func_obj); if (!code) return result; auto count = code->co_argcount; bool has_varargs = !!(code->co_flags & CO_VARARGS); result.max_positional_args = has_varargs ? ArgInfo::UNBOUNDED : (count - (int)is_bound_method) - (int)is_class; #endif return result; } constexpr unsigned PythonCallable::ArgInfo::UNBOUNDED; // FIXME delete after c++17 PythonObject PythonCallable::operator()() { return PythonObject(PyRefType::Owned, PyObject_CallObject(m_py_obj, nullptr)); } PythonObject PythonCallable:: operator()(std::initializer_list args) { PythonTuple arg_tuple(args); return PythonObject(PyRefType::Owned, PyObject_CallObject(m_py_obj, arg_tuple.get())); } PythonObject PythonCallable:: operator()(std::initializer_list args) { PythonTuple arg_tuple(args); return PythonObject(PyRefType::Owned, PyObject_CallObject(m_py_obj, arg_tuple.get())); } bool PythonFile::Check(PyObject *py_obj) { if (!py_obj) return false; #if PY_MAJOR_VERSION < 3 return PyFile_Check(py_obj); #else // In Python 3, there is no `PyFile_Check`, and in fact PyFile is not even a // first-class object type anymore. `PyFile_FromFd` is just a thin wrapper // over `io.open()`, which returns some object derived from `io.IOBase`. As a // result, the only way to detect a file in Python 3 is to check whether it // inherits from `io.IOBase`. auto io_module = PythonModule::Import("io"); if (!io_module) { llvm::consumeError(io_module.takeError()); return false; } auto iobase = io_module.get().Get("IOBase"); if (!iobase) { llvm::consumeError(iobase.takeError()); return false; } int r = PyObject_IsInstance(py_obj, iobase.get().get()); if (r < 0) { llvm::consumeError(exception()); // clear the exception and log it. return false; } return !!r; #endif } namespace { class GIL { public: GIL() { m_state = PyGILState_Ensure(); assert(!PyErr_Occurred()); } ~GIL() { PyGILState_Release(m_state); } protected: PyGILState_STATE m_state; }; } // namespace const char *PythonException::toCString() const { if (!m_repr_bytes) return "unknown exception"; return PyBytes_AS_STRING(m_repr_bytes); } PythonException::PythonException(const char *caller) { assert(PyErr_Occurred()); m_exception_type = m_exception = m_traceback = m_repr_bytes = NULL; PyErr_Fetch(&m_exception_type, &m_exception, &m_traceback); PyErr_NormalizeException(&m_exception_type, &m_exception, &m_traceback); PyErr_Clear(); if (m_exception) { PyObject *repr = PyObject_Repr(m_exception); if (repr) { m_repr_bytes = PyUnicode_AsEncodedString(repr, "utf-8", nullptr); if (!m_repr_bytes) { PyErr_Clear(); } Py_XDECREF(repr); } else { PyErr_Clear(); } } Log *log = GetLogIfAllCategoriesSet(LIBLLDB_LOG_SCRIPT); if (caller) LLDB_LOGF(log, "%s failed with exception: %s", caller, toCString()); else LLDB_LOGF(log, "python exception: %s", toCString()); } void PythonException::Restore() { if (m_exception_type && m_exception) { PyErr_Restore(m_exception_type, m_exception, m_traceback); } else { PyErr_SetString(PyExc_Exception, toCString()); } m_exception_type = m_exception = m_traceback = NULL; } PythonException::~PythonException() { Py_XDECREF(m_exception_type); Py_XDECREF(m_exception); Py_XDECREF(m_traceback); Py_XDECREF(m_repr_bytes); } void PythonException::log(llvm::raw_ostream &OS) const { OS << toCString(); } std::error_code PythonException::convertToErrorCode() const { return llvm::inconvertibleErrorCode(); } bool PythonException::Matches(PyObject *exc) const { return PyErr_GivenExceptionMatches(m_exception_type, exc); } const char read_exception_script[] = R"( import sys from traceback import print_exception if sys.version_info.major < 3: from StringIO import StringIO else: from io import StringIO def main(exc_type, exc_value, tb): f = StringIO() print_exception(exc_type, exc_value, tb, file=f) return f.getvalue() )"; std::string PythonException::ReadBacktrace() const { if (!m_traceback) return toCString(); // no need to synchronize access to this global, we already have the GIL static PythonScript read_exception(read_exception_script); Expected backtrace = As( read_exception(m_exception_type, m_exception, m_traceback)); if (!backtrace) { std::string message = std::string(toCString()) + "\n" + "Traceback unavailable, an error occurred while reading it:\n"; return (message + llvm::toString(backtrace.takeError())); } return std::move(backtrace.get()); } char PythonException::ID = 0; llvm::Expected GetOptionsForPyObject(const PythonObject &obj) { #if PY_MAJOR_VERSION >= 3 auto options = File::OpenOptions(0); auto readable = As(obj.CallMethod("readable")); if (!readable) return readable.takeError(); auto writable = As(obj.CallMethod("writable")); if (!writable) return writable.takeError(); if (readable.get()) options |= File::eOpenOptionRead; if (writable.get()) options |= File::eOpenOptionWrite; return options; #else PythonString py_mode = obj.GetAttributeValue("mode").AsType(); return File::GetOptionsFromMode(py_mode.GetString()); #endif } // Base class template for python files. All it knows how to do // is hold a reference to the python object and close or flush it // when the File is closed. namespace { template class OwnedPythonFile : public Base { public: template OwnedPythonFile(const PythonFile &file, bool borrowed, Args... args) : Base(args...), m_py_obj(file), m_borrowed(borrowed) { assert(m_py_obj); } ~OwnedPythonFile() override { assert(m_py_obj); GIL takeGIL; Close(); // we need to ensure the python object is released while we still // hold the GIL m_py_obj.Reset(); } bool IsPythonSideValid() const { GIL takeGIL; auto closed = As(m_py_obj.GetAttribute("closed")); if (!closed) { llvm::consumeError(closed.takeError()); return false; } return !closed.get(); } bool IsValid() const override { return IsPythonSideValid() && Base::IsValid(); } Status Close() override { assert(m_py_obj); Status py_error, base_error; GIL takeGIL; if (!m_borrowed) { auto r = m_py_obj.CallMethod("close"); if (!r) py_error = Status(r.takeError()); } base_error = Base::Close(); if (py_error.Fail()) return py_error; return base_error; }; PyObject *GetPythonObject() const { assert(m_py_obj.IsValid()); return m_py_obj.get(); } static bool classof(const File *file) = delete; protected: PythonFile m_py_obj; bool m_borrowed; }; } // namespace // A SimplePythonFile is a OwnedPythonFile that just does all I/O as // a NativeFile namespace { class SimplePythonFile : public OwnedPythonFile { public: SimplePythonFile(const PythonFile &file, bool borrowed, int fd, File::OpenOptions options) : OwnedPythonFile(file, borrowed, fd, options, false) {} static char ID; bool isA(const void *classID) const override { return classID == &ID || NativeFile::isA(classID); } static bool classof(const File *file) { return file->isA(&ID); } }; char SimplePythonFile::ID = 0; } // namespace #if PY_MAJOR_VERSION >= 3 namespace { class PythonBuffer { public: PythonBuffer &operator=(const PythonBuffer &) = delete; PythonBuffer(const PythonBuffer &) = delete; static Expected Create(PythonObject &obj, int flags = PyBUF_SIMPLE) { Py_buffer py_buffer = {}; PyObject_GetBuffer(obj.get(), &py_buffer, flags); if (!py_buffer.obj) return llvm::make_error(); return PythonBuffer(py_buffer); } PythonBuffer(PythonBuffer &&other) { m_buffer = other.m_buffer; other.m_buffer.obj = nullptr; } ~PythonBuffer() { if (m_buffer.obj) PyBuffer_Release(&m_buffer); } Py_buffer &get() { return m_buffer; } private: // takes ownership of the buffer. PythonBuffer(const Py_buffer &py_buffer) : m_buffer(py_buffer) {} Py_buffer m_buffer; }; } // namespace // Shared methods between TextPythonFile and BinaryPythonFile namespace { class PythonIOFile : public OwnedPythonFile { public: PythonIOFile(const PythonFile &file, bool borrowed) : OwnedPythonFile(file, borrowed) {} ~PythonIOFile() override { Close(); } bool IsValid() const override { return IsPythonSideValid(); } Status Close() override { assert(m_py_obj); GIL takeGIL; if (m_borrowed) return Flush(); auto r = m_py_obj.CallMethod("close"); if (!r) return Status(r.takeError()); return Status(); } Status Flush() override { GIL takeGIL; auto r = m_py_obj.CallMethod("flush"); if (!r) return Status(r.takeError()); return Status(); } Expected GetOptions() const override { GIL takeGIL; return GetOptionsForPyObject(m_py_obj); } static char ID; bool isA(const void *classID) const override { return classID == &ID || File::isA(classID); } static bool classof(const File *file) { return file->isA(&ID); } }; char PythonIOFile::ID = 0; } // namespace namespace { class BinaryPythonFile : public PythonIOFile { protected: int m_descriptor; public: BinaryPythonFile(int fd, const PythonFile &file, bool borrowed) : PythonIOFile(file, borrowed), m_descriptor(File::DescriptorIsValid(fd) ? fd : File::kInvalidDescriptor) {} int GetDescriptor() const override { return m_descriptor; } Status Write(const void *buf, size_t &num_bytes) override { GIL takeGIL; PyObject *pybuffer_p = PyMemoryView_FromMemory( const_cast((const char *)buf), num_bytes, PyBUF_READ); if (!pybuffer_p) return Status(llvm::make_error()); auto pybuffer = Take(pybuffer_p); num_bytes = 0; auto bytes_written = As(m_py_obj.CallMethod("write", pybuffer)); if (!bytes_written) return Status(bytes_written.takeError()); if (bytes_written.get() < 0) return Status(".write() method returned a negative number!"); static_assert(sizeof(long long) >= sizeof(size_t), "overflow"); num_bytes = bytes_written.get(); return Status(); } Status Read(void *buf, size_t &num_bytes) override { GIL takeGIL; static_assert(sizeof(long long) >= sizeof(size_t), "overflow"); auto pybuffer_obj = m_py_obj.CallMethod("read", (unsigned long long)num_bytes); if (!pybuffer_obj) return Status(pybuffer_obj.takeError()); num_bytes = 0; if (pybuffer_obj.get().IsNone()) { // EOF num_bytes = 0; return Status(); } auto pybuffer = PythonBuffer::Create(pybuffer_obj.get()); if (!pybuffer) return Status(pybuffer.takeError()); memcpy(buf, pybuffer.get().get().buf, pybuffer.get().get().len); num_bytes = pybuffer.get().get().len; return Status(); } }; } // namespace namespace { class TextPythonFile : public PythonIOFile { protected: int m_descriptor; public: TextPythonFile(int fd, const PythonFile &file, bool borrowed) : PythonIOFile(file, borrowed), m_descriptor(File::DescriptorIsValid(fd) ? fd : File::kInvalidDescriptor) {} int GetDescriptor() const override { return m_descriptor; } Status Write(const void *buf, size_t &num_bytes) override { GIL takeGIL; auto pystring = PythonString::FromUTF8(llvm::StringRef((const char *)buf, num_bytes)); if (!pystring) return Status(pystring.takeError()); num_bytes = 0; auto bytes_written = As(m_py_obj.CallMethod("write", pystring.get())); if (!bytes_written) return Status(bytes_written.takeError()); if (bytes_written.get() < 0) return Status(".write() method returned a negative number!"); static_assert(sizeof(long long) >= sizeof(size_t), "overflow"); num_bytes = bytes_written.get(); return Status(); } Status Read(void *buf, size_t &num_bytes) override { GIL takeGIL; size_t num_chars = num_bytes / 6; size_t orig_num_bytes = num_bytes; num_bytes = 0; if (orig_num_bytes < 6) { return Status("can't read less than 6 bytes from a utf8 text stream"); } auto pystring = As( m_py_obj.CallMethod("read", (unsigned long long)num_chars)); if (!pystring) return Status(pystring.takeError()); if (pystring.get().IsNone()) { // EOF return Status(); } auto stringref = pystring.get().AsUTF8(); if (!stringref) return Status(stringref.takeError()); num_bytes = stringref.get().size(); memcpy(buf, stringref.get().begin(), num_bytes); return Status(); } }; } // namespace #endif llvm::Expected PythonFile::ConvertToFile(bool borrowed) { if (!IsValid()) return llvm::createStringError(llvm::inconvertibleErrorCode(), "invalid PythonFile"); int fd = PyObject_AsFileDescriptor(m_py_obj); if (fd < 0) { PyErr_Clear(); return ConvertToFileForcingUseOfScriptingIOMethods(borrowed); } auto options = GetOptionsForPyObject(*this); if (!options) return options.takeError(); if (options.get() & File::eOpenOptionWrite) { // LLDB and python will not share I/O buffers. We should probably // flush the python buffers now. auto r = CallMethod("flush"); if (!r) return r.takeError(); } FileSP file_sp; if (borrowed) { // In this case we we don't need to retain the python // object at all. file_sp = std::make_shared(fd, options.get(), false); } else { file_sp = std::static_pointer_cast( std::make_shared(*this, borrowed, fd, options.get())); } if (!file_sp->IsValid()) return llvm::createStringError(llvm::inconvertibleErrorCode(), "invalid File"); return file_sp; } llvm::Expected PythonFile::ConvertToFileForcingUseOfScriptingIOMethods(bool borrowed) { assert(!PyErr_Occurred()); if (!IsValid()) return llvm::createStringError(llvm::inconvertibleErrorCode(), "invalid PythonFile"); #if PY_MAJOR_VERSION < 3 return llvm::createStringError(llvm::inconvertibleErrorCode(), "not supported on python 2"); #else int fd = PyObject_AsFileDescriptor(m_py_obj); if (fd < 0) { PyErr_Clear(); fd = File::kInvalidDescriptor; } auto io_module = PythonModule::Import("io"); if (!io_module) return io_module.takeError(); auto textIOBase = io_module.get().Get("TextIOBase"); if (!textIOBase) return textIOBase.takeError(); auto rawIOBase = io_module.get().Get("RawIOBase"); if (!rawIOBase) return rawIOBase.takeError(); auto bufferedIOBase = io_module.get().Get("BufferedIOBase"); if (!bufferedIOBase) return bufferedIOBase.takeError(); FileSP file_sp; auto isTextIO = IsInstance(textIOBase.get()); if (!isTextIO) return isTextIO.takeError(); if (isTextIO.get()) file_sp = std::static_pointer_cast( std::make_shared(fd, *this, borrowed)); auto isRawIO = IsInstance(rawIOBase.get()); if (!isRawIO) return isRawIO.takeError(); auto isBufferedIO = IsInstance(bufferedIOBase.get()); if (!isBufferedIO) return isBufferedIO.takeError(); if (isRawIO.get() || isBufferedIO.get()) { file_sp = std::static_pointer_cast( std::make_shared(fd, *this, borrowed)); } if (!file_sp) return llvm::createStringError(llvm::inconvertibleErrorCode(), "python file is neither text nor binary"); if (!file_sp->IsValid()) return llvm::createStringError(llvm::inconvertibleErrorCode(), "invalid File"); return file_sp; #endif } Expected PythonFile::FromFile(File &file, const char *mode) { if (!file.IsValid()) return llvm::createStringError(llvm::inconvertibleErrorCode(), "invalid file"); if (auto *simple = llvm::dyn_cast(&file)) return Retain(simple->GetPythonObject()); #if PY_MAJOR_VERSION >= 3 if (auto *pythonio = llvm::dyn_cast(&file)) return Retain(pythonio->GetPythonObject()); #endif if (!mode) { auto m = file.GetOpenMode(); if (!m) return m.takeError(); mode = m.get(); } PyObject *file_obj; #if PY_MAJOR_VERSION >= 3 file_obj = PyFile_FromFd(file.GetDescriptor(), nullptr, mode, -1, nullptr, "ignore", nullptr, /*closefd=*/0); #else // I'd like to pass ::fflush here if the file is writable, so that // when the python side destructs the file object it will be flushed. // However, this would be dangerous. It can cause fflush to be called // after fclose if the python program keeps a reference to the file after // the original lldb_private::File has been destructed. // // It's all well and good to ask a python program not to use a closed file // but asking a python program to make sure objects get released in a // particular order is not safe. // // The tradeoff here is that if a python 2 program wants to make sure this // file gets flushed, they'll have to do it explicitly or wait untill the // original lldb File itself gets flushed. file_obj = PyFile_FromFile(file.GetStream(), py2_const_cast(""), py2_const_cast(mode), [](FILE *) { return 0; }); #endif if (!file_obj) return exception(); return Take(file_obj); } Error PythonScript::Init() { if (function.IsValid()) return Error::success(); PythonDictionary globals(PyInitialValue::Empty); auto builtins = PythonModule::BuiltinsModule(); if (Error error = globals.SetItem("__builtins__", builtins)) return error; PyObject *o = PyRun_String(script, Py_file_input, globals.get(), globals.get()); if (!o) return exception(); Take(o); auto f = As(globals.GetItem("main")); if (!f) return f.takeError(); function = std::move(f.get()); return Error::success(); } llvm::Expected python::runStringOneLine(const llvm::Twine &string, const PythonDictionary &globals, const PythonDictionary &locals) { if (!globals.IsValid() || !locals.IsValid()) return nullDeref(); PyObject *code = Py_CompileString(NullTerminated(string), "", Py_eval_input); if (!code) { PyErr_Clear(); code = Py_CompileString(NullTerminated(string), "", Py_single_input); } if (!code) return exception(); auto code_ref = Take(code); #if PY_MAJOR_VERSION < 3 PyObject *result = PyEval_EvalCode((PyCodeObject *)code, globals.get(), locals.get()); #else PyObject *result = PyEval_EvalCode(code, globals.get(), locals.get()); #endif if (!result) return exception(); return Take(result); } llvm::Expected python::runStringMultiLine(const llvm::Twine &string, const PythonDictionary &globals, const PythonDictionary &locals) { if (!globals.IsValid() || !locals.IsValid()) return nullDeref(); PyObject *result = PyRun_String(NullTerminated(string), Py_file_input, globals.get(), locals.get()); if (!result) return exception(); return Take(result); } #endif