1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. Use of this
2 // source code is governed by a BSD-style license that can be found in the
3 // LICENSE file.
4
5 #include "net/ftp/ftp_directory_listing_buffer.h"
6
7 #include "base/i18n/icu_string_conversions.h"
8 #include "base/stl_util-inl.h"
9 #include "base/string_util.h"
10 #include "net/base/net_errors.h"
11 #include "net/ftp/ftp_directory_listing_parser_ls.h"
12 #include "net/ftp/ftp_directory_listing_parser_mlsd.h"
13 #include "net/ftp/ftp_directory_listing_parser_netware.h"
14 #include "net/ftp/ftp_directory_listing_parser_vms.h"
15 #include "net/ftp/ftp_directory_listing_parser_windows.h"
16 #include "unicode/ucsdet.h"
17
18 namespace {
19
20 // A very simple-minded character encoding detection.
21 // TODO(jungshik): We can apply more heuristics here (e.g. using various hints
22 // like TLD, the UI language/default encoding of a client, etc). In that case,
23 // this should be pulled out of here and moved somewhere in base because there
24 // can be other use cases.
DetectEncoding(const std::string & text)25 std::string DetectEncoding(const std::string& text) {
26 if (IsStringASCII(text))
27 return std::string();
28 UErrorCode status = U_ZERO_ERROR;
29 UCharsetDetector* detector = ucsdet_open(&status);
30 ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()),
31 &status);
32 const UCharsetMatch* match = ucsdet_detect(detector, &status);
33 const char* encoding = ucsdet_getName(match, &status);
34 ucsdet_close(detector);
35 // Should we check the quality of the match? A rather arbitrary number is
36 // assigned by ICU and it's hard to come up with a lower limit.
37 if (U_FAILURE(status))
38 return std::string();
39 return encoding;
40 }
41
42 } // namespace
43
44 namespace net {
45
FtpDirectoryListingBuffer()46 FtpDirectoryListingBuffer::FtpDirectoryListingBuffer()
47 : current_parser_(NULL) {
48 parsers_.insert(new FtpDirectoryListingParserLs());
49 parsers_.insert(new FtpDirectoryListingParserMlsd());
50 parsers_.insert(new FtpDirectoryListingParserNetware());
51 parsers_.insert(new FtpDirectoryListingParserVms());
52 parsers_.insert(new FtpDirectoryListingParserWindows());
53 }
54
~FtpDirectoryListingBuffer()55 FtpDirectoryListingBuffer::~FtpDirectoryListingBuffer() {
56 STLDeleteElements(&parsers_);
57 }
58
ConsumeData(const char * data,int data_length)59 int FtpDirectoryListingBuffer::ConsumeData(const char* data, int data_length) {
60 buffer_.append(data, data_length);
61
62 if (!encoding_.empty() || buffer_.length() > 1024) {
63 int rv = ExtractFullLinesFromBuffer();
64 if (rv != OK)
65 return rv;
66 }
67
68 return ParseLines();
69 }
70
ProcessRemainingData()71 int FtpDirectoryListingBuffer::ProcessRemainingData() {
72 int rv = ExtractFullLinesFromBuffer();
73 if (rv != OK)
74 return rv;
75
76 if (!buffer_.empty())
77 return ERR_INVALID_RESPONSE;
78
79 rv = ParseLines();
80 if (rv != OK)
81 return rv;
82
83 rv = OnEndOfInput();
84 if (rv != OK)
85 return rv;
86
87 return OK;
88 }
89
EntryAvailable() const90 bool FtpDirectoryListingBuffer::EntryAvailable() const {
91 return (current_parser_ ? current_parser_->EntryAvailable() : false);
92 }
93
PopEntry()94 FtpDirectoryListingEntry FtpDirectoryListingBuffer::PopEntry() {
95 DCHECK(EntryAvailable());
96 return current_parser_->PopEntry();
97 }
98
GetServerType() const99 FtpServerType FtpDirectoryListingBuffer::GetServerType() const {
100 return (current_parser_ ? current_parser_->GetServerType() : SERVER_UNKNOWN);
101 }
102
ConvertToDetectedEncoding(const std::string & from,string16 * to)103 bool FtpDirectoryListingBuffer::ConvertToDetectedEncoding(
104 const std::string& from, string16* to) {
105 std::string encoding(encoding_.empty() ? "ascii" : encoding_);
106 return base::CodepageToUTF16(from, encoding.c_str(),
107 base::OnStringConversionError::FAIL, to);
108 }
109
ExtractFullLinesFromBuffer()110 int FtpDirectoryListingBuffer::ExtractFullLinesFromBuffer() {
111 if (encoding_.empty())
112 encoding_ = DetectEncoding(buffer_);
113
114 int cut_pos = 0;
115 // TODO(phajdan.jr): This code accepts all endlines matching \r*\n. Should it
116 // be more strict, or enforce consistent line endings?
117 for (size_t i = 0; i < buffer_.length(); ++i) {
118 if (buffer_[i] != '\n')
119 continue;
120 int line_length = i - cut_pos;
121 if (i >= 1 && buffer_[i - 1] == '\r')
122 line_length--;
123 std::string line(buffer_.substr(cut_pos, line_length));
124 cut_pos = i + 1;
125 string16 line_converted;
126 if (!ConvertToDetectedEncoding(line, &line_converted)) {
127 buffer_.erase(0, cut_pos);
128 return ERR_ENCODING_CONVERSION_FAILED;
129 }
130 lines_.push_back(line_converted);
131 }
132 buffer_.erase(0, cut_pos);
133 return OK;
134 }
135
ParseLines()136 int FtpDirectoryListingBuffer::ParseLines() {
137 while (!lines_.empty()) {
138 string16 line = lines_.front();
139 lines_.pop_front();
140 if (current_parser_) {
141 if (!current_parser_->ConsumeLine(line))
142 return ERR_FAILED;
143 } else {
144 ParserSet::iterator i = parsers_.begin();
145 while (i != parsers_.end()) {
146 if ((*i)->ConsumeLine(line)) {
147 i++;
148 } else {
149 delete *i;
150 parsers_.erase(i++);
151 }
152 }
153 if (parsers_.empty())
154 return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT;
155 if (parsers_.size() == 1)
156 current_parser_ = *parsers_.begin();
157 }
158 }
159
160 return OK;
161 }
162
OnEndOfInput()163 int FtpDirectoryListingBuffer::OnEndOfInput() {
164 ParserSet::iterator i = parsers_.begin();
165 while (i != parsers_.end()) {
166 if ((*i)->OnEndOfInput()) {
167 i++;
168 } else {
169 delete *i;
170 parsers_.erase(i++);
171 }
172 }
173
174 if (parsers_.size() != 1) {
175 DCHECK(!current_parser_);
176
177 // We may hit an ambiguity in case of listings which have no entries. That's
178 // fine, as long as all remaining parsers agree that the listing is empty.
179 bool all_listings_empty = true;
180 for (ParserSet::iterator i = parsers_.begin(); i != parsers_.end(); ++i) {
181 if ((*i)->EntryAvailable())
182 all_listings_empty = false;
183 }
184 if (all_listings_empty)
185 return OK;
186
187 return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT;
188 }
189
190 current_parser_ = *parsers_.begin();
191 return OK;
192 }
193
194 } // namespace net
195