• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Format bytes as hexadecimal */
2 
3 #include "Python.h"
4 #include "pycore_strhex.h"        // _Py_strhex_with_sep()
5 #include "pycore_unicodeobject.h" // _PyUnicode_CheckConsistency()
6 
_Py_strhex_impl(const char * argbuf,const Py_ssize_t arglen,PyObject * sep,int bytes_per_sep_group,const int return_bytes)7 static PyObject *_Py_strhex_impl(const char* argbuf, const Py_ssize_t arglen,
8                                  PyObject* sep, int bytes_per_sep_group,
9                                  const int return_bytes)
10 {
11     assert(arglen >= 0);
12 
13     Py_UCS1 sep_char = 0;
14     if (sep) {
15         Py_ssize_t seplen = PyObject_Length((PyObject*)sep);
16         if (seplen < 0) {
17             return NULL;
18         }
19         if (seplen != 1) {
20             PyErr_SetString(PyExc_ValueError, "sep must be length 1.");
21             return NULL;
22         }
23         if (PyUnicode_Check(sep)) {
24             if (PyUnicode_KIND(sep) != PyUnicode_1BYTE_KIND) {
25                 PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
26                 return NULL;
27             }
28             sep_char = PyUnicode_READ_CHAR(sep, 0);
29         }
30         else if (PyBytes_Check(sep)) {
31             sep_char = PyBytes_AS_STRING(sep)[0];
32         }
33         else {
34             PyErr_SetString(PyExc_TypeError, "sep must be str or bytes.");
35             return NULL;
36         }
37         if (sep_char > 127 && !return_bytes) {
38             PyErr_SetString(PyExc_ValueError, "sep must be ASCII.");
39             return NULL;
40         }
41     }
42     else {
43         bytes_per_sep_group = 0;
44     }
45 
46     unsigned int abs_bytes_per_sep = Py_ABS(bytes_per_sep_group);
47     Py_ssize_t resultlen = 0;
48     if (bytes_per_sep_group && arglen > 0) {
49         /* How many sep characters we'll be inserting. */
50         resultlen = (arglen - 1) / abs_bytes_per_sep;
51     }
52     /* Bounds checking for our Py_ssize_t indices. */
53     if (arglen >= PY_SSIZE_T_MAX / 2 - resultlen) {
54         return PyErr_NoMemory();
55     }
56     resultlen += arglen * 2;
57 
58     if ((size_t)abs_bytes_per_sep >= (size_t)arglen) {
59         bytes_per_sep_group = 0;
60         abs_bytes_per_sep = 0;
61     }
62 
63     PyObject *retval;
64     Py_UCS1 *retbuf;
65     if (return_bytes) {
66         /* If _PyBytes_FromSize() were public we could avoid malloc+copy. */
67         retval = PyBytes_FromStringAndSize(NULL, resultlen);
68         if (!retval) {
69             return NULL;
70         }
71         retbuf = (Py_UCS1 *)PyBytes_AS_STRING(retval);
72     }
73     else {
74         retval = PyUnicode_New(resultlen, 127);
75         if (!retval) {
76             return NULL;
77         }
78         retbuf = PyUnicode_1BYTE_DATA(retval);
79     }
80 
81     /* Hexlify */
82     Py_ssize_t i, j;
83     unsigned char c;
84 
85     if (bytes_per_sep_group == 0) {
86         for (i = j = 0; i < arglen; ++i) {
87             assert((j + 1) < resultlen);
88             c = argbuf[i];
89             retbuf[j++] = Py_hexdigits[c >> 4];
90             retbuf[j++] = Py_hexdigits[c & 0x0f];
91         }
92         assert(j == resultlen);
93     }
94     else {
95         /* The number of complete chunk+sep periods */
96         Py_ssize_t chunks = (arglen - 1) / abs_bytes_per_sep;
97         Py_ssize_t chunk;
98         unsigned int k;
99 
100         if (bytes_per_sep_group < 0) {
101             i = j = 0;
102             for (chunk = 0; chunk < chunks; chunk++) {
103                 for (k = 0; k < abs_bytes_per_sep; k++) {
104                     c = argbuf[i++];
105                     retbuf[j++] = Py_hexdigits[c >> 4];
106                     retbuf[j++] = Py_hexdigits[c & 0x0f];
107                 }
108                 retbuf[j++] = sep_char;
109             }
110             while (i < arglen) {
111                 c = argbuf[i++];
112                 retbuf[j++] = Py_hexdigits[c >> 4];
113                 retbuf[j++] = Py_hexdigits[c & 0x0f];
114             }
115             assert(j == resultlen);
116         }
117         else {
118             i = arglen - 1;
119             j = resultlen - 1;
120             for (chunk = 0; chunk < chunks; chunk++) {
121                 for (k = 0; k < abs_bytes_per_sep; k++) {
122                     c = argbuf[i--];
123                     retbuf[j--] = Py_hexdigits[c & 0x0f];
124                     retbuf[j--] = Py_hexdigits[c >> 4];
125                 }
126                 retbuf[j--] = sep_char;
127             }
128             while (i >= 0) {
129                 c = argbuf[i--];
130                 retbuf[j--] = Py_hexdigits[c & 0x0f];
131                 retbuf[j--] = Py_hexdigits[c >> 4];
132             }
133             assert(j == -1);
134         }
135     }
136 
137 #ifdef Py_DEBUG
138     if (!return_bytes) {
139         assert(_PyUnicode_CheckConsistency(retval, 1));
140     }
141 #endif
142 
143     return retval;
144 }
145 
_Py_strhex(const char * argbuf,const Py_ssize_t arglen)146 PyObject * _Py_strhex(const char* argbuf, const Py_ssize_t arglen)
147 {
148     return _Py_strhex_impl(argbuf, arglen, NULL, 0, 0);
149 }
150 
151 /* Same as above but returns a bytes() instead of str() to avoid the
152  * need to decode the str() when bytes are needed. */
_Py_strhex_bytes(const char * argbuf,const Py_ssize_t arglen)153 PyObject* _Py_strhex_bytes(const char* argbuf, const Py_ssize_t arglen)
154 {
155     return _Py_strhex_impl(argbuf, arglen, NULL, 0, 1);
156 }
157 
158 /* These variants include support for a separator between every N bytes: */
159 
_Py_strhex_with_sep(const char * argbuf,const Py_ssize_t arglen,PyObject * sep,const int bytes_per_group)160 PyObject* _Py_strhex_with_sep(const char* argbuf, const Py_ssize_t arglen,
161                               PyObject* sep, const int bytes_per_group)
162 {
163     return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 0);
164 }
165 
166 /* Same as above but returns a bytes() instead of str() to avoid the
167  * need to decode the str() when bytes are needed. */
_Py_strhex_bytes_with_sep(const char * argbuf,const Py_ssize_t arglen,PyObject * sep,const int bytes_per_group)168 PyObject* _Py_strhex_bytes_with_sep(const char* argbuf, const Py_ssize_t arglen,
169                                     PyObject* sep, const int bytes_per_group)
170 {
171     return _Py_strhex_impl(argbuf, arglen, sep, bytes_per_group, 1);
172 }
173