• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__
6 #define CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__
7 
8 #include <map>
9 #include <stdio.h>
10 #include <string>
11 #include <vector>
12 
13 namespace base {
14 class FilePath;
15 }
16 
17 namespace convert_dict {
18 
19 class AffReader {
20  public:
21   explicit AffReader(const base::FilePath& path);
22   ~AffReader();
23 
24   bool Read();
25 
26   // Returns whether this file uses indexed affixes, or, on false, whether the
27   // rule string will be specified literally in the .dic file. This must be
28   // called after Read().
has_indexed_affixes()29   bool has_indexed_affixes() const { return has_indexed_affixes_; }
30 
31   // Returns a string representing the encoding of the dictionary. This will
32   // default to ISO-8859-1 if the .aff file does not specify it.
encoding()33   const char* encoding() const { return encoding_.c_str(); }
34 
35   // Converts the given string from the file encoding to UTF-8, returning true
36   // on success.
37   bool EncodingToUTF8(const std::string& encoded, std::string* utf8) const;
38 
39   // Adds a new affix string, returning the index. If it already exists, returns
40   // the index of the existing one. This is used to convert .dic files which
41   // list the
42   // You must not call this until after Read();
43   int GetAFIndexForAFString(const std::string& af_string);
44 
45   // Getters for the computed data.
comments()46   const std::string& comments() const { return intro_comment_; }
affix_rules()47   const std::vector<std::string>& affix_rules() const { return affix_rules_; }
48   const std::vector< std::pair<std::string, std::string> >&
replacements()49       replacements() const {
50     return replacements_;
51   }
other_commands()52   const std::vector<std::string>& other_commands() const {
53     return other_commands_;
54   }
55 
56   // Returns the affix groups ("AF" lines) for this file. The indices into this
57   // are 1-based, but we don't use the 0th item, so lookups will have to
58   // subtract one to get the index. This is how hunspell stores this data.
59   std::vector<std::string> GetAffixGroups() const;
60 
61  private:
62   // Command-specific handlers. These are given the string folling the
63   // command. The input rule may be modified arbitrarily by the function.
64   int AddAffixGroup(std::string* rule);  // Returns the new affix group ID.
65   void AddAffix(std::string* rule);  // SFX/PFX
66   void AddReplacement(std::string* rule);
67   // void HandleFlag(std::string* rule);
68 
69   // Used to handle "other" commands. The "raw" just saves the line as-is.
70   // The "encoded" version converts the line to UTF-8 and saves it.
71   void HandleRawCommand(const std::string& line);
72   void HandleEncodedCommand(const std::string& line);
73 
74   FILE* file_;
75 
76   // Comments from the beginning of the file. This is everything before the
77   // first command. We want to store this since it often contains the copyright
78   // information.
79   std::string intro_comment_;
80 
81   // Encoding of the source words.
82   std::string encoding_;
83 
84   // Affix rules. These are populated by "AF" commands. The .dic file can refer
85   // to these by index. They are indexed by their string value (the list of
86   // characters representing rules), and map to the numeric affix IDs.
87   //
88   // These can also be added using GetAFIndexForAFString.
89   std::map<std::string, int> affix_groups_;
90 
91   // True when the affixes were specified in the .aff file using indices. The
92   // dictionary reader uses this to see how it should treat the stuff after the
93   // word on each line.
94   bool has_indexed_affixes_;
95 
96   // SFX and PFX commands. This is a list of each of those lines in the order
97   // they appear in the file. They have been re-encoded.
98   std::vector<std::string> affix_rules_;
99 
100   // Replacement commands. The first string is a possible input, and the second
101   // is the replacment.
102   std::vector< std::pair<std::string, std::string> > replacements_;
103 
104   // All other commands.
105   std::vector<std::string> other_commands_;
106 };
107 
108 }  // namespace convert_dict
109 
110 #endif  // CHROME_TOOLS_CONVERT_DICT_AFF_READER_H__
111