1 /* Format bytes as hexadecimal */
2
3 #include "Python.h"
4 #include "pycore_strhex.h" // _Py_strhex_with_sep()
5 #include "pycore_unicodeobject.h" // _PyUnicode_CheckConsistency()
6
_Py_strhex_impl(const char * argbuf,const Py_ssize_t arglen,PyObject * sep,int bytes_per_sep_group,const int return_bytes)7 static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,
8 PyObject* sep, int bytes_per_sep_group,
9 const int return_bytes)
10 {
11 assert(arglen >= 0);
12
13 Py_UCS1 sep_char = 0;
14 if (sep) {
15 Py_ssize_t seplen = PyObject_Length((PyObject*)sep);
16 if (seplen < 0) {
17 return NULL;
18 }
19 if (seplen != 1) {
20 PyErr_SetString(PyExc_ValueError, "sep must be length 1.");
21 return NULL;
22 }
23 if (PyUnicode_Check(sep)) {
24 if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) {
25 PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
26 return NULL;
27 }
28 sep_char = PyUnicode_READ_CHAR(sep, 0);
29 }
30 else if (PyBytes_Check(sep)) {
31 sep_char = PyBytes_AS_STRING(sep)[0];
32 }
33 else {
34 PyErr_SetString(PyExc_TypeError, "sep must be str or bytes.");
35 return NULL;
36 }
37 if (sep_char > 127 && !return_bytes) {
38 PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
39 return NULL;
40 }
41 }
42 else {
43 bytes_per_sep_group = 0;
44 }
45
46 unsigned int abs_bytes_per_sep = Py_ABS(bytes_per_sep_group);
47 Py_ssize_t resultlen = 0;
48 if (bytes_per_sep_group && arglen > 0) {
49 /* How many sep characters we'll be inserting. */
50 resultlen = (arglen - 1) / abs_bytes_per_sep;
51 }
52 /* Bounds checking for our Py_ssize_t indices. */
53 if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) {
54 return PyErr_NoMemory();
55 }
56 resultlen += arglen * 2;
57
58 if ((size_t)abs_bytes_per_sep >= (size_t)arglen) {
59 bytes_per_sep_group = 0;
60 abs_bytes_per_sep = 0;
61 }
62
63 PyObject *retval;
64 Py_UCS1 *retbuf;
65 if (return_bytes) {
66 /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */
67 retval = PyBytes_FromStringAndSize(NULL, resultlen);
68 if (!retval) {
69 return NULL;
70 }
71 retbuf = (Py_UCS1 *)PyBytes_AS_STRING(retval);
72 }
73 else {
74 retval = PyUnicode_New(resultlen, 127);
75 if (!retval) {
76 return NULL;
77 }
78 retbuf = PyUnicode_1BYTE_DATA(retval);
79 }
80
81 /* Hexlify */
82 Py_ssize_t i, j;
83 unsigned char c;
84
85 if (bytes_per_sep_group == 0) {
86 for (i = j = 0; i < arglen; ++i) {
87 assert((j + 1) < resultlen);
88 c = argbuf[i];
89 retbuf[j++] = Py_hexdigits[c >> 4];
90 retbuf[j++] = Py_hexdigits[c & 0x0f];
91 }
92 assert(j == resultlen);
93 }
94 else {
95 /* The number of complete chunk+sep periods */
96 Py_ssize_t chunks = (arglen - 1) / abs_bytes_per_sep;
97 Py_ssize_t chunk;
98 unsigned int k;
99
100 if (bytes_per_sep_group < 0) {
101 i = j = 0;
102 for (chunk = 0; chunk < chunks; chunk++) {
103 for (k = 0; k < abs_bytes_per_sep; k++) {
104 c = argbuf[i++];
105 retbuf[j++] = Py_hexdigits[c >> 4];
106 retbuf[j++] = Py_hexdigits[c & 0x0f];
107 }
108 retbuf[j++] = sep_char;
109 }
110 while (i < arglen) {
111 c = argbuf[i++];
112 retbuf[j++] = Py_hexdigits[c >> 4];
113 retbuf[j++] = Py_hexdigits[c & 0x0f];
114 }
115 assert(j == resultlen);
116 }
117 else {
118 i = arglen - 1;
119 j = resultlen - 1;
120 for (chunk = 0; chunk < chunks; chunk++) {
121 for (k = 0; k < abs_bytes_per_sep; k++) {
122 c = argbuf[i--];
123 retbuf[j--] = Py_hexdigits[c & 0x0f];
124 retbuf[j--] = Py_hexdigits[c >> 4];
125 }
126 retbuf[j--] = sep_char;
127 }
128 while (i >= 0) {
129 c = argbuf[i--];
130 retbuf[j--] = Py_hexdigits[c & 0x0f];
131 retbuf[j--] = Py_hexdigits[c >> 4];
132 }
133 assert(j == -1);
134 }
135 }
136
137 #ifdef Py_DEBUG
138 if (!return_bytes) {
139 assert(_PyUnicode_CheckConsistency(retval, 1));
140 }
141 #endif
142
143 return retval;
144 }
145
_Py_strhex(const char * argbuf,const Py_ssize_t arglen)146 PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen)
147 {
148 return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0);
149 }
150
151 /* Same as above but returns a bytes() instead of str() to avoid the
152 * need to decode the str() when bytes are needed. */
_Py_strhex_bytes(const char * argbuf,const Py_ssize_t arglen)153 PyObject* _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen)
154 {
155 return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1);
156 }
157
158 /* These variants include support for a separator between every N bytes: */
159
_Py_strhex_with_sep(const char * argbuf,const Py_ssize_t arglen,PyObject * sep,const int bytes_per_group)160 PyObject* _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen,
161 PyObject* sep, const int bytes_per_group)
162 {
163 return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0);
164 }
165
166 /* Same as above but returns a bytes() instead of str() to avoid the
167 * need to decode the str() when bytes are needed. */
_Py_strhex_bytes_with_sep(const char * argbuf,const Py_ssize_t arglen,PyObject * sep,const int bytes_per_group)168 PyObject* _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen,
169 PyObject* sep, const int bytes_per_group)
170 {
171 return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1);
172 }
173