1 /* 2 * libwebsockets - small server side websockets and web server implementation 3 * 4 * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25 /* Do not treat - as a terminal character, so "my-token" is one token */ 26 #define LWS_TOKENIZE_F_MINUS_NONTERM (1 << 0) 27 /* Separately report aggregate colon-delimited tokens */ 28 #define LWS_TOKENIZE_F_AGG_COLON (1 << 1) 29 /* Enforce sequencing for a simple token , token , token ... list */ 30 #define LWS_TOKENIZE_F_COMMA_SEP_LIST (1 << 2) 31 /* Allow more characters in the tokens and less delimiters... default is 32 * only alphanumeric + underscore in tokens */ 33 #define LWS_TOKENIZE_F_RFC7230_DELIMS (1 << 3) 34 /* Do not treat . as a terminal character, so "warmcat.com" is one token */ 35 #define LWS_TOKENIZE_F_DOT_NONTERM (1 << 4) 36 /* If something starts looking like a float, like 1.2, force to be string token. 37 * This lets you receive dotted-quads like 192.168.0.1 as string tokens, and 38 * avoids illegal float format detection like 1.myserver.com */ 39 #define LWS_TOKENIZE_F_NO_FLOATS (1 << 5) 40 /* Instead of LWS_TOKZE_INTEGER, report integers as any other string token */ 41 #define LWS_TOKENIZE_F_NO_INTEGERS (1 << 6) 42 /* # makes the rest of the line a comment */ 43 #define LWS_TOKENIZE_F_HASH_COMMENT (1 << 7) 44 /* Do not treat / as a terminal character, so "multipart/related" is one token */ 45 #define LWS_TOKENIZE_F_SLASH_NONTERM (1 << 8) 46 47 typedef enum { 48 49 LWS_TOKZE_ERRS = 5, /* the number of errors defined */ 50 51 LWS_TOKZE_ERR_BROKEN_UTF8 = -5, /* malformed or partial utf8 */ 52 LWS_TOKZE_ERR_UNTERM_STRING = -4, /* ended while we were in "" */ 53 LWS_TOKZE_ERR_MALFORMED_FLOAT = -3, /* like 0..1 or 0.1.1 */ 54 LWS_TOKZE_ERR_NUM_ON_LHS = -2, /* like 123= or 0.1= */ 55 LWS_TOKZE_ERR_COMMA_LIST = -1, /* like ",tok", or, "tok,," */ 56 57 LWS_TOKZE_ENDED = 0, /* no more content */ 58 59 /* Note: results have ordinal 1+, EOT is 0 and errors are < 0 */ 60 61 LWS_TOKZE_DELIMITER, /* a delimiter appeared */ 62 LWS_TOKZE_TOKEN, /* a token appeared */ 63 LWS_TOKZE_INTEGER, /* an integer appeared */ 64 LWS_TOKZE_FLOAT, /* a float appeared */ 65 LWS_TOKZE_TOKEN_NAME_EQUALS, /* token [whitespace] = */ 66 LWS_TOKZE_TOKEN_NAME_COLON, /* token [whitespace] : (only with 67 LWS_TOKENIZE_F_AGG_COLON flag) */ 68 LWS_TOKZE_QUOTED_STRING, /* "*", where * may have any char */ 69 70 } lws_tokenize_elem; 71 72 /* 73 * helper enums to allow caller to enforce legal delimiter sequencing, eg 74 * disallow "token,,token", "token,", and ",token" 75 */ 76 77 enum lws_tokenize_delimiter_tracking { 78 LWSTZ_DT_NEED_FIRST_CONTENT, 79 LWSTZ_DT_NEED_DELIM, 80 LWSTZ_DT_NEED_NEXT_CONTENT, 81 }; 82 83 typedef struct lws_tokenize { 84 const char *start; /**< set to the start of the string to tokenize */ 85 const char *token; /**< the start of an identified token or delimiter */ 86 size_t len; /**< set to the length of the string to tokenize */ 87 size_t token_len; /**< the length of the identied token or delimiter */ 88 89 uint16_t flags; /**< optional LWS_TOKENIZE_F_ flags, or 0 */ 90 uint8_t delim; 91 92 int8_t e; /**< convenient for storing lws_tokenize return */ 93 } lws_tokenize_t; 94 95 /** 96 * lws_tokenize() - breaks down a string into tokens and delimiters in-place 97 * 98 * \param ts: the lws_tokenize struct to init 99 * \param start: the string to tokenize 100 * \param flags: LWS_TOKENIZE_F_ option flags 101 * 102 * This initializes the tokenize struct to point to the given string, and 103 * sets the length to 2GiB - 1 (so there must be a terminating NUL)... you can 104 * override this requirement by setting ts.len yourself before using it. 105 * 106 * .delim is also initialized to LWSTZ_DT_NEED_FIRST_CONTENT. 107 */ 108 109 LWS_VISIBLE LWS_EXTERN void 110 lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags); 111 112 /** 113 * lws_tokenize() - breaks down a string into tokens and delimiters in-place 114 * 115 * \param ts: the lws_tokenize struct with information and state on what to do 116 * 117 * The \p ts struct should have its start, len and flags members initialized to 118 * reflect the string to be tokenized and any options. 119 * 120 * Then `lws_tokenize()` may be called repeatedly on the struct, returning one 121 * of `lws_tokenize_elem` each time, and with the struct's `token` and 122 * `token_len` members set to describe the content of the delimiter or token 123 * payload each time. 124 * 125 * There are no allocations during the process. 126 * 127 * returns lws_tokenize_elem that was identified (LWS_TOKZE_ENDED means reached 128 * the end of the string). 129 */ 130 131 LWS_VISIBLE LWS_EXTERN lws_tokenize_elem 132 lws_tokenize(struct lws_tokenize *ts); 133 134 /** 135 * lws_tokenize_cstr() - copy token string to NUL-terminated buffer 136 * 137 * \param ts: pointer to lws_tokenize struct to operate on 138 * \param str: destination buffer 139 * \pparam max: bytes in destination buffer 140 * 141 * returns 0 if OK or nonzero if the string + NUL won't fit. 142 */ 143 144 LWS_VISIBLE LWS_EXTERN int 145 lws_tokenize_cstr(struct lws_tokenize *ts, char *str, size_t max); 146 147 148 /* 149 * lws_strexp: flexible string expansion helper api 150 * 151 * This stateful helper can handle multiple separate input chunks and multiple 152 * output buffer loads with arbitrary boundaries between literals and expanded 153 * symbols. This allows it to handle fragmented input as well as arbitrarily 154 * long symbol expansions that are bigger than the output buffer itself. 155 * 156 * A user callback is used to convert symbol names to the symbol value. 157 * 158 * A single byte buffer for input and another for output can process any 159 * length substitution then. The state object is around 64 bytes on a 64-bit 160 * system and it only uses 8 bytes stack. 161 */ 162 163 164 typedef int (*lws_strexp_expand_cb)(void *priv, const char *name, char *out, 165 size_t *pos, size_t olen, size_t *exp_ofs); 166 167 typedef struct lws_strexp { 168 char name[32]; 169 lws_strexp_expand_cb cb; 170 void *priv; 171 char *out; 172 size_t olen; 173 size_t pos; 174 175 size_t exp_ofs; 176 177 uint8_t name_pos; 178 char state; 179 } lws_strexp_t; 180 181 enum { 182 LSTRX_DONE, /* it completed OK */ 183 LSTRX_FILLED_OUT, /* out buf filled and needs resetting */ 184 LSTRX_FATAL_NAME_TOO_LONG = -1, /* fatal */ 185 LSTRX_FATAL_NAME_UNKNOWN = -2, 186 }; 187 188 189 /** 190 * lws_strexp_init() - initialize an lws_strexp_t for use 191 * 192 * \p exp: the exp object to init 193 * \p priv: the user's object pointer to pass to callback 194 * \p cb: the callback to expand named objects 195 * \p out: the start of the output buffer 196 * \p olen: the length of the output buffer in bytes 197 * 198 * Prepares an lws_strexp_t for use and sets the initial output buffer 199 */ 200 LWS_VISIBLE LWS_EXTERN void 201 lws_strexp_init(lws_strexp_t *exp, void *priv, lws_strexp_expand_cb cb, 202 char *out, size_t olen); 203 204 /** 205 * lws_strexp_reset_out() - reset the output buffer on an existing strexp 206 * 207 * \p exp: the exp object to init 208 * \p out: the start of the output buffer 209 * \p olen: the length of the output buffer in bytes 210 * 211 * Provides a new output buffer for lws_strexp_expand() to continue to write 212 * into. It can be the same as the old one if it has been copied out or used. 213 * The position of the next write will be reset to the start of the given buf. 214 */ 215 LWS_VISIBLE LWS_EXTERN void 216 lws_strexp_reset_out(lws_strexp_t *exp, char *out, size_t olen); 217 218 /** 219 * lws_strexp_expand() - copy / expand a string into the output buffer 220 * 221 * \p exp: the exp object for the copy / expansion 222 * \p in: the start of the next input data 223 * \p len: the length of the input data 224 * \p pused_in: pointer to write the amount of input used 225 * \p pused_out: pointer to write the amount of output used 226 * 227 * Copies in to the output buffer set in exp, expanding any ${name} tokens using 228 * the callback. \p *pused_in is set to the number of input chars used and 229 * \p *pused_out the number of output characters used 230 * 231 * May return LSTRX_FILLED_OUT early with *pused < len if the output buffer is 232 * filled. Handle the output buffer and reset it with lws_strexp_reset_out() 233 * before calling again with adjusted in / len to continue. 234 * 235 * In the case of large expansions, the expansion itself may fill the output 236 * buffer, in which case the expansion callback returns the LSTRX_FILLED_OUT 237 * and will be called again to continue with its *exp_ofs parameter set 238 * appropriately. 239 */ 240 LWS_VISIBLE LWS_EXTERN int 241 lws_strexp_expand(lws_strexp_t *exp, const char *in, size_t len, 242 size_t *pused_in, size_t *pused_out); 243 244