1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 2005-2015, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 */ 9 10 #ifndef __CSR2022_H 11 #define __CSR2022_H 12 13 #include "unicode/utypes.h" 14 15 #if !UCONFIG_NO_CONVERSION 16 17 #include "csrecog.h" 18 19 U_NAMESPACE_BEGIN 20 21 class CharsetMatch; 22 23 /** 24 * class CharsetRecog_2022 part of the ICU charset detection imlementation. 25 * This is a superclass for the individual detectors for 26 * each of the detectable members of the ISO 2022 family 27 * of encodings. 28 * 29 * The separate classes are nested within this class. 30 * 31 * @internal 32 */ 33 class CharsetRecog_2022 : public CharsetRecognizer 34 { 35 36 public: 37 virtual ~CharsetRecog_2022() = 0; 38 39 protected: 40 41 /** 42 * Matching function shared among the 2022 detectors JP, CN and KR 43 * Counts up the number of legal an unrecognized escape sequences in 44 * the sample of text, and computes a score based on the total number & 45 * the proportion that fit the encoding. 46 * 47 * 48 * @param text the byte buffer containing text to analyse 49 * @param textLen the size of the text in the byte. 50 * @param escapeSequences the byte escape sequences to test for. 51 * @return match quality, in the range of 0-100. 52 */ 53 int32_t match_2022(const uint8_t *text, 54 int32_t textLen, 55 const uint8_t escapeSequences[][5], 56 int32_t escapeSequences_length) const; 57 58 }; 59 60 class CharsetRecog_2022JP :public CharsetRecog_2022 61 { 62 public: 63 virtual ~CharsetRecog_2022JP(); 64 65 const char *getName() const; 66 67 UBool match(InputText *textIn, CharsetMatch *results) const; 68 }; 69 70 #if !UCONFIG_ONLY_HTML_CONVERSION 71 class CharsetRecog_2022KR :public CharsetRecog_2022 { 72 public: 73 virtual ~CharsetRecog_2022KR(); 74 75 const char *getName() const; 76 77 UBool match(InputText *textIn, CharsetMatch *results) const; 78 79 }; 80 81 class CharsetRecog_2022CN :public CharsetRecog_2022 82 { 83 public: 84 virtual ~CharsetRecog_2022CN(); 85 86 const char* getName() const; 87 88 UBool match(InputText *textIn, CharsetMatch *results) const; 89 }; 90 #endif 91 92 U_NAMESPACE_END 93 94 #endif 95 #endif /* __CSR2022_H */ 96