• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  **********************************************************************
3  *   Copyright (C) 2005-2006, International Business Machines
4  *   Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  */
7 
8 #ifndef __CSR2022_H
9 #define __CSR2022_H
10 
11 #include "unicode/utypes.h"
12 
13 #if !UCONFIG_NO_CONVERSION
14 
15 #include "csrecog.h"
16 
17 U_NAMESPACE_BEGIN
18 
19 /**
20  *  class CharsetRecog_2022  part of the ICU charset detection imlementation.
21  *                           This is a superclass for the individual detectors for
22  *                           each of the detectable members of the ISO 2022 family
23  *                           of encodings.
24  *
25  *                           The separate classes are nested within this class.
26  *
27  * @internal
28  */
29 class CharsetRecog_2022 : public CharsetRecognizer
30 {
31 
32 public:
33     virtual ~CharsetRecog_2022() = 0;
34 
35 protected:
36 
37     /**
38      * Matching function shared among the 2022 detectors JP, CN and KR
39      * Counts up the number of legal an unrecognized escape sequences in
40      * the sample of text, and computes a score based on the total number &
41      * the proportion that fit the encoding.
42      *
43      *
44      * @param text the byte buffer containing text to analyse
45      * @param textLen  the size of the text in the byte.
46      * @param escapeSequences the byte escape sequences to test for.
47      * @return match quality, in the range of 0-100.
48      */
49     int32_t match_2022(const uint8_t *text, int32_t textLen, const uint8_t escapeSequences[][5], int32_t escapeSequences_length);
50 
51 };
52 
53 class CharsetRecog_2022JP :public CharsetRecog_2022
54 {
55 public:
~CharsetRecog_2022JP()56     virtual ~CharsetRecog_2022JP() {}
57 
58     const char *getName() const;
59 
60     int32_t match(InputText *textIn);
61 };
62 
63 class CharsetRecog_2022KR :public CharsetRecog_2022 {
64 public:
~CharsetRecog_2022KR()65     virtual ~CharsetRecog_2022KR() {}
66 
67     const char *getName() const;
68 
69     int32_t match(InputText *textIn);
70 
71 };
72 
73 class CharsetRecog_2022CN :public CharsetRecog_2022
74 {
75 public:
~CharsetRecog_2022CN()76     virtual ~CharsetRecog_2022CN() {}
77 
78     const char* getName() const;
79 
80     int32_t match(InputText *textIn);
81 };
82 
83 U_NAMESPACE_END
84 
85 #endif
86 #endif /* __CSR2022_H */
87