1 /* 2 * nghttp2 - HTTP/2 C Library 3 * 4 * Copyright (c) 2012 Tatsuhiro Tsujikawa 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be 15 * included in all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25 #ifndef HTML_PARSER_H 26 #define HTML_PARSER_H 27 28 #include "nghttp2_config.h" 29 30 #include <vector> 31 #include <string> 32 33 #ifdef HAVE_LIBXML2 34 35 # include <libxml/HTMLparser.h> 36 37 #endif // HAVE_LIBXML2 38 39 namespace nghttp2 { 40 41 enum ResourceType { 42 REQ_CSS = 1, 43 REQ_JS, 44 REQ_UNBLOCK_JS, 45 REQ_IMG, 46 REQ_OTHERS, 47 }; 48 49 struct ParserData { 50 std::string base_uri; 51 std::vector<std::pair<std::string, ResourceType>> links; 52 // > 0 if we are inside "head" element. 53 int inside_head; 54 ParserData(const std::string &base_uri); 55 }; 56 57 #ifdef HAVE_LIBXML2 58 59 class HtmlParser { 60 public: 61 HtmlParser(const std::string &base_uri); 62 ~HtmlParser(); 63 int parse_chunk(const char *chunk, size_t size, int fin); 64 const std::vector<std::pair<std::string, ResourceType>> &get_links() const; 65 void clear_links(); 66 67 private: 68 int parse_chunk_internal(const char *chunk, size_t size, int fin); 69 70 std::string base_uri_; 71 htmlParserCtxtPtr parser_ctx_; 72 ParserData parser_data_; 73 }; 74 75 #else // !HAVE_LIBXML2 76 77 class HtmlParser { 78 public: HtmlParser(const std::string & base_uri)79 HtmlParser(const std::string &base_uri) {} parse_chunk(const char * chunk,size_t size,int fin)80 int parse_chunk(const char *chunk, size_t size, int fin) { return 0; } get_links()81 const std::vector<std::pair<std::string, ResourceType>> &get_links() const { 82 return links_; 83 } clear_links()84 void clear_links() {} 85 86 private: 87 std::vector<std::pair<std::string, ResourceType>> links_; 88 }; 89 90 #endif // !HAVE_LIBXML2 91 92 } // namespace nghttp2 93 94 #endif // HTML_PARSER_H 95