• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  **********************************************************************
5  *   Copyright (C) 2005-2015, International Business Machines
6  *   Corporation and others.  All Rights Reserved.
7  **********************************************************************
8  */
9 
10 #ifndef __CSR2022_H
11 #define __CSR2022_H
12 
13 #include "unicode/utypes.h"
14 
15 #if !UCONFIG_NO_CONVERSION
16 
17 #include "csrecog.h"
18 
19 U_NAMESPACE_BEGIN
20 
21 class CharsetMatch;
22 
23 /**
24  *  class CharsetRecog_2022  part of the ICU charset detection imlementation.
25  *                           This is a superclass for the individual detectors for
26  *                           each of the detectable members of the ISO 2022 family
27  *                           of encodings.
28  *
29  *                           The separate classes are nested within this class.
30  *
31  * @internal
32  */
33 class CharsetRecog_2022 : public CharsetRecognizer
34 {
35 
36 public:
37     virtual ~CharsetRecog_2022() = 0;
38 
39 protected:
40 
41     /**
42      * Matching function shared among the 2022 detectors JP, CN and KR
43      * Counts up the number of legal an unrecognized escape sequences in
44      * the sample of text, and computes a score based on the total number &
45      * the proportion that fit the encoding.
46      *
47      *
48      * @param text the byte buffer containing text to analyse
49      * @param textLen  the size of the text in the byte.
50      * @param escapeSequences the byte escape sequences to test for.
51      * @return match quality, in the range of 0-100.
52      */
53     int32_t match_2022(const uint8_t *text,
54                        int32_t textLen,
55                        const uint8_t escapeSequences[][5],
56                        int32_t escapeSequences_length) const;
57 
58 };
59 
60 class CharsetRecog_2022JP :public CharsetRecog_2022
61 {
62 public:
63     virtual ~CharsetRecog_2022JP();
64 
65     const char *getName() const;
66 
67     UBool match(InputText *textIn, CharsetMatch *results) const;
68 };
69 
70 #if !UCONFIG_ONLY_HTML_CONVERSION
71 class CharsetRecog_2022KR :public CharsetRecog_2022 {
72 public:
73     virtual ~CharsetRecog_2022KR();
74 
75     const char *getName() const;
76 
77     UBool match(InputText *textIn, CharsetMatch *results) const;
78 
79 };
80 
81 class CharsetRecog_2022CN :public CharsetRecog_2022
82 {
83 public:
84     virtual ~CharsetRecog_2022CN();
85 
86     const char* getName() const;
87 
88     UBool match(InputText *textIn, CharsetMatch *results) const;
89 };
90 #endif
91 
92 U_NAMESPACE_END
93 
94 #endif
95 #endif /* __CSR2022_H */
96