• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.  Use of this
2 // source code is governed by a BSD-style license that can be found in the
3 // LICENSE file.
4 
5 #include "net/ftp/ftp_directory_listing_buffer.h"
6 
7 #include "base/i18n/icu_string_conversions.h"
8 #include "base/stl_util-inl.h"
9 #include "base/string_util.h"
10 #include "net/base/net_errors.h"
11 #include "net/ftp/ftp_directory_listing_parser_ls.h"
12 #include "net/ftp/ftp_directory_listing_parser_mlsd.h"
13 #include "net/ftp/ftp_directory_listing_parser_netware.h"
14 #include "net/ftp/ftp_directory_listing_parser_vms.h"
15 #include "net/ftp/ftp_directory_listing_parser_windows.h"
16 #include "unicode/ucsdet.h"
17 
18 namespace {
19 
20 // A very simple-minded character encoding detection.
21 // TODO(jungshik): We can apply more heuristics here (e.g. using various hints
22 // like TLD, the UI language/default encoding of a client, etc). In that case,
23 // this should be pulled out of here and moved somewhere in base because there
24 // can be other use cases.
DetectEncoding(const std::string & text)25 std::string DetectEncoding(const std::string& text) {
26   if (IsStringASCII(text))
27     return std::string();
28   UErrorCode status = U_ZERO_ERROR;
29   UCharsetDetector* detector = ucsdet_open(&status);
30   ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()),
31                  &status);
32   const UCharsetMatch* match = ucsdet_detect(detector, &status);
33   const char* encoding = ucsdet_getName(match, &status);
34   ucsdet_close(detector);
35   // Should we check the quality of the match? A rather arbitrary number is
36   // assigned by ICU and it's hard to come up with a lower limit.
37   if (U_FAILURE(status))
38     return std::string();
39   return encoding;
40 }
41 
42 }  // namespace
43 
44 namespace net {
45 
FtpDirectoryListingBuffer()46 FtpDirectoryListingBuffer::FtpDirectoryListingBuffer()
47     : current_parser_(NULL) {
48   parsers_.insert(new FtpDirectoryListingParserLs());
49   parsers_.insert(new FtpDirectoryListingParserMlsd());
50   parsers_.insert(new FtpDirectoryListingParserNetware());
51   parsers_.insert(new FtpDirectoryListingParserVms());
52   parsers_.insert(new FtpDirectoryListingParserWindows());
53 }
54 
~FtpDirectoryListingBuffer()55 FtpDirectoryListingBuffer::~FtpDirectoryListingBuffer() {
56   STLDeleteElements(&parsers_);
57 }
58 
ConsumeData(const char * data,int data_length)59 int FtpDirectoryListingBuffer::ConsumeData(const char* data, int data_length) {
60   buffer_.append(data, data_length);
61 
62   if (!encoding_.empty() || buffer_.length() > 1024) {
63     int rv = ExtractFullLinesFromBuffer();
64     if (rv != OK)
65       return rv;
66   }
67 
68   return ParseLines();
69 }
70 
ProcessRemainingData()71 int FtpDirectoryListingBuffer::ProcessRemainingData() {
72   int rv = ExtractFullLinesFromBuffer();
73   if (rv != OK)
74     return rv;
75 
76   if (!buffer_.empty())
77     return ERR_INVALID_RESPONSE;
78 
79   rv = ParseLines();
80   if (rv != OK)
81     return rv;
82 
83   rv = OnEndOfInput();
84   if (rv != OK)
85     return rv;
86 
87   return OK;
88 }
89 
EntryAvailable() const90 bool FtpDirectoryListingBuffer::EntryAvailable() const {
91   return (current_parser_ ? current_parser_->EntryAvailable() : false);
92 }
93 
PopEntry()94 FtpDirectoryListingEntry FtpDirectoryListingBuffer::PopEntry() {
95   DCHECK(EntryAvailable());
96   return current_parser_->PopEntry();
97 }
98 
GetServerType() const99 FtpServerType FtpDirectoryListingBuffer::GetServerType() const {
100   return (current_parser_ ? current_parser_->GetServerType() : SERVER_UNKNOWN);
101 }
102 
ConvertToDetectedEncoding(const std::string & from,string16 * to)103 bool FtpDirectoryListingBuffer::ConvertToDetectedEncoding(
104     const std::string& from, string16* to) {
105   std::string encoding(encoding_.empty() ? "ascii" : encoding_);
106   return base::CodepageToUTF16(from, encoding.c_str(),
107                                base::OnStringConversionError::FAIL, to);
108 }
109 
ExtractFullLinesFromBuffer()110 int FtpDirectoryListingBuffer::ExtractFullLinesFromBuffer() {
111   if (encoding_.empty())
112     encoding_ = DetectEncoding(buffer_);
113 
114   int cut_pos = 0;
115   // TODO(phajdan.jr): This code accepts all endlines matching \r*\n. Should it
116   // be more strict, or enforce consistent line endings?
117   for (size_t i = 0; i < buffer_.length(); ++i) {
118     if (buffer_[i] != '\n')
119       continue;
120     int line_length = i - cut_pos;
121     if (i >= 1 && buffer_[i - 1] == '\r')
122       line_length--;
123     std::string line(buffer_.substr(cut_pos, line_length));
124     cut_pos = i + 1;
125     string16 line_converted;
126     if (!ConvertToDetectedEncoding(line, &line_converted)) {
127       buffer_.erase(0, cut_pos);
128       return ERR_ENCODING_CONVERSION_FAILED;
129     }
130     lines_.push_back(line_converted);
131   }
132   buffer_.erase(0, cut_pos);
133   return OK;
134 }
135 
ParseLines()136 int FtpDirectoryListingBuffer::ParseLines() {
137   while (!lines_.empty()) {
138     string16 line = lines_.front();
139     lines_.pop_front();
140     if (current_parser_) {
141       if (!current_parser_->ConsumeLine(line))
142         return ERR_FAILED;
143     } else {
144       ParserSet::iterator i = parsers_.begin();
145       while (i != parsers_.end()) {
146         if ((*i)->ConsumeLine(line)) {
147           i++;
148         } else {
149           delete *i;
150           parsers_.erase(i++);
151         }
152       }
153       if (parsers_.empty())
154         return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT;
155       if (parsers_.size() == 1)
156         current_parser_ = *parsers_.begin();
157     }
158   }
159 
160   return OK;
161 }
162 
OnEndOfInput()163 int FtpDirectoryListingBuffer::OnEndOfInput() {
164   ParserSet::iterator i = parsers_.begin();
165   while (i != parsers_.end()) {
166     if ((*i)->OnEndOfInput()) {
167       i++;
168     } else {
169       delete *i;
170       parsers_.erase(i++);
171     }
172   }
173 
174   if (parsers_.size() != 1) {
175     DCHECK(!current_parser_);
176 
177     // We may hit an ambiguity in case of listings which have no entries. That's
178     // fine, as long as all remaining parsers agree that the listing is empty.
179     bool all_listings_empty = true;
180     for (ParserSet::iterator i = parsers_.begin(); i != parsers_.end(); ++i) {
181       if ((*i)->EntryAvailable())
182         all_listings_empty = false;
183     }
184     if (all_listings_empty)
185       return OK;
186 
187     return ERR_UNRECOGNIZED_FTP_DIRECTORY_LISTING_FORMAT;
188   }
189 
190   current_parser_ = *parsers_.begin();
191   return OK;
192 }
193 
194 }  // namespace net
195