1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ********************************************************************** 5 * Copyright (C) 2005-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ********************************************************************** 8 */ 9 10 #ifndef __CSDETECT_H 11 #define __CSDETECT_H 12 13 #include "unicode/uobject.h" 14 15 #if !UCONFIG_NO_CONVERSION 16 17 #include "unicode/uenum.h" 18 19 U_NAMESPACE_BEGIN 20 21 class InputText; 22 class CharsetRecognizer; 23 class CharsetMatch; 24 25 class CharsetDetector : public UMemory 26 { 27 private: 28 InputText *textIn; 29 CharsetMatch **resultArray; 30 int32_t resultCount; 31 UBool fStripTags; // If true, setText() will strip tags from input text. 32 UBool fFreshTextSet; 33 static void setRecognizers(UErrorCode &status); 34 35 UBool *fEnabledRecognizers; // If not null, active set of charset recognizers had 36 // been changed from the default. The array index is 37 // corresponding to fCSRecognizers. See setDetectableCharset(). 38 39 public: 40 CharsetDetector(UErrorCode &status); 41 42 ~CharsetDetector(); 43 44 void setText(const char *in, int32_t len); 45 46 const CharsetMatch * const *detectAll(int32_t &maxMatchesFound, UErrorCode &status); 47 48 const CharsetMatch *detect(UErrorCode& status); 49 50 void setDeclaredEncoding(const char *encoding, int32_t len) const; 51 52 UBool setStripTagsFlag(UBool flag); 53 54 UBool getStripTagsFlag() const; 55 56 // const char *getCharsetName(int32_t index, UErrorCode& status) const; 57 58 static int32_t getDetectableCount(); 59 60 61 static UEnumeration * getAllDetectableCharsets(UErrorCode &status); 62 UEnumeration * getDetectableCharsets(UErrorCode &status) const; 63 void setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status); 64 }; 65 66 U_NAMESPACE_END 67 68 #endif 69 #endif /* __CSDETECT_H */ 70