• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "net/ftp/ftp_util.h"
6 
7 #include <map>
8 #include <vector>
9 
10 #include "base/i18n/case_conversion.h"
11 #include "base/i18n/char_iterator.h"
12 #include "base/logging.h"
13 #include "base/memory/singleton.h"
14 #include "base/strings/string_number_conversions.h"
15 #include "base/strings/string_piece.h"
16 #include "base/strings/string_split.h"
17 #include "base/strings/string_tokenizer.h"
18 #include "base/strings/string_util.h"
19 #include "base/strings/utf_string_conversions.h"
20 #include "base/time/time.h"
21 #include "third_party/icu/source/common/unicode/uchar.h"
22 #include "third_party/icu/source/i18n/unicode/datefmt.h"
23 #include "third_party/icu/source/i18n/unicode/dtfmtsym.h"
24 
25 using base::ASCIIToUTF16;
26 using base::StringPiece16;
27 
28 // For examples of Unix<->VMS path conversions, see the unit test file. On VMS
29 // a path looks differently depending on whether it's a file or directory.
30 
31 namespace net {
32 
33 // static
UnixFilePathToVMS(const std::string & unix_path)34 std::string FtpUtil::UnixFilePathToVMS(const std::string& unix_path) {
35   if (unix_path.empty())
36     return std::string();
37 
38   base::StringTokenizer tokenizer(unix_path, "/");
39   std::vector<std::string> tokens;
40   while (tokenizer.GetNext())
41     tokens.push_back(tokenizer.token());
42 
43   if (unix_path[0] == '/') {
44     // It's an absolute path.
45 
46     if (tokens.empty()) {
47       DCHECK_EQ(1U, unix_path.length());
48       return "[]";
49     }
50 
51     if (tokens.size() == 1)
52       return unix_path.substr(1);  // Drop the leading slash.
53 
54     std::string result(tokens[0] + ":[");
55     if (tokens.size() == 2) {
56       // Don't ask why, it just works that way on VMS.
57       result.append("000000");
58     } else {
59       result.append(tokens[1]);
60       for (size_t i = 2; i < tokens.size() - 1; i++)
61         result.append("." + tokens[i]);
62     }
63     result.append("]" + tokens[tokens.size() - 1]);
64     return result;
65   }
66 
67   if (tokens.size() == 1)
68     return unix_path;
69 
70   std::string result("[");
71   for (size_t i = 0; i < tokens.size() - 1; i++)
72     result.append("." + tokens[i]);
73   result.append("]" + tokens[tokens.size() - 1]);
74   return result;
75 }
76 
77 // static
UnixDirectoryPathToVMS(const std::string & unix_path)78 std::string FtpUtil::UnixDirectoryPathToVMS(const std::string& unix_path) {
79   if (unix_path.empty())
80     return std::string();
81 
82   std::string path(unix_path);
83 
84   if (path[path.length() - 1] != '/')
85     path.append("/");
86 
87   // Reuse logic from UnixFilePathToVMS by appending a fake file name to the
88   // real path and removing it after conversion.
89   path.append("x");
90   path = UnixFilePathToVMS(path);
91   return path.substr(0, path.length() - 1);
92 }
93 
94 // static
VMSPathToUnix(const std::string & vms_path)95 std::string FtpUtil::VMSPathToUnix(const std::string& vms_path) {
96   if (vms_path.empty())
97     return ".";
98 
99   if (vms_path[0] == '/') {
100     // This is not really a VMS path. Most likely the server is emulating UNIX.
101     // Return path as-is.
102     return vms_path;
103   }
104 
105   if (vms_path == "[]")
106     return "/";
107 
108   std::string result(vms_path);
109   if (vms_path[0] == '[') {
110     // It's a relative path.
111     ReplaceFirstSubstringAfterOffset(&result, 0, "[.", std::string());
112   } else {
113     // It's an absolute path.
114     result.insert(0, "/");
115     ReplaceSubstringsAfterOffset(&result, 0, ":[000000]", "/");
116     ReplaceSubstringsAfterOffset(&result, 0, ":[", "/");
117   }
118   std::replace(result.begin(), result.end(), '.', '/');
119   std::replace(result.begin(), result.end(), ']', '/');
120 
121   // Make sure the result doesn't end with a slash.
122   if (result.length() && result[result.length() - 1] == '/')
123     result = result.substr(0, result.length() - 1);
124 
125   return result;
126 }
127 
128 namespace {
129 
130 // Lazy-initialized map of abbreviated month names.
131 class AbbreviatedMonthsMap {
132  public:
GetInstance()133   static AbbreviatedMonthsMap* GetInstance() {
134     return Singleton<AbbreviatedMonthsMap>::get();
135   }
136 
137   // Converts abbreviated month name |text| to its number (in range 1-12).
138   // On success returns true and puts the number in |number|.
GetMonthNumber(const base::string16 & text,int * number)139   bool GetMonthNumber(const base::string16& text, int* number) {
140     // Ignore the case of the month names. The simplest way to handle that
141     // is to make everything lowercase.
142     base::string16 text_lower(base::i18n::ToLower(text));
143 
144     if (map_.find(text_lower) == map_.end())
145       return false;
146 
147     *number = map_[text_lower];
148     return true;
149   }
150 
151  private:
152   friend struct DefaultSingletonTraits<AbbreviatedMonthsMap>;
153 
154   // Constructor, initializes the map based on ICU data. It is much faster
155   // to do that just once.
AbbreviatedMonthsMap()156   AbbreviatedMonthsMap() {
157     int32_t locales_count;
158     const icu::Locale* locales =
159         icu::DateFormat::getAvailableLocales(locales_count);
160 
161     for (int32_t locale = 0; locale < locales_count; locale++) {
162       UErrorCode status(U_ZERO_ERROR);
163 
164       icu::DateFormatSymbols format_symbols(locales[locale], status);
165 
166       // If we cannot get format symbols for some locale, it's not a fatal
167       // error. Just try another one.
168       if (U_FAILURE(status))
169         continue;
170 
171       int32_t months_count;
172       const icu::UnicodeString* months =
173           format_symbols.getShortMonths(months_count);
174 
175       for (int32_t month = 0; month < months_count; month++) {
176         base::string16 month_name(months[month].getBuffer(),
177                             static_cast<size_t>(months[month].length()));
178 
179         // Ignore the case of the month names. The simplest way to handle that
180         // is to make everything lowercase.
181         month_name = base::i18n::ToLower(month_name);
182 
183         map_[month_name] = month + 1;
184 
185         // Sometimes ICU returns longer strings, but in FTP listings a shorter
186         // abbreviation is used (for example for the Russian locale). Make sure
187         // we always have a map entry for a three-letter abbreviation.
188         map_[month_name.substr(0, 3)] = month + 1;
189       }
190     }
191 
192     // Fail loudly if the data returned by ICU is obviously incomplete.
193     // This is intended to catch cases like http://crbug.com/177428
194     // much earlier. Note that the issue above turned out to be non-trivial
195     // to reproduce - crash data is much better indicator of a problem
196     // than incomplete bug reports.
197     CHECK_EQ(1, map_[ASCIIToUTF16("jan")]);
198     CHECK_EQ(2, map_[ASCIIToUTF16("feb")]);
199     CHECK_EQ(3, map_[ASCIIToUTF16("mar")]);
200     CHECK_EQ(4, map_[ASCIIToUTF16("apr")]);
201     CHECK_EQ(5, map_[ASCIIToUTF16("may")]);
202     CHECK_EQ(6, map_[ASCIIToUTF16("jun")]);
203     CHECK_EQ(7, map_[ASCIIToUTF16("jul")]);
204     CHECK_EQ(8, map_[ASCIIToUTF16("aug")]);
205     CHECK_EQ(9, map_[ASCIIToUTF16("sep")]);
206     CHECK_EQ(10, map_[ASCIIToUTF16("oct")]);
207     CHECK_EQ(11, map_[ASCIIToUTF16("nov")]);
208     CHECK_EQ(12, map_[ASCIIToUTF16("dec")]);
209   }
210 
211   // Maps lowercase month names to numbers in range 1-12.
212   std::map<base::string16, int> map_;
213 
214   DISALLOW_COPY_AND_ASSIGN(AbbreviatedMonthsMap);
215 };
216 
217 }  // namespace
218 
219 // static
AbbreviatedMonthToNumber(const base::string16 & text,int * number)220 bool FtpUtil::AbbreviatedMonthToNumber(const base::string16& text,
221                                        int* number) {
222   return AbbreviatedMonthsMap::GetInstance()->GetMonthNumber(text, number);
223 }
224 
225 // static
LsDateListingToTime(const base::string16 & month,const base::string16 & day,const base::string16 & rest,const base::Time & current_time,base::Time * result)226 bool FtpUtil::LsDateListingToTime(const base::string16& month,
227                                   const base::string16& day,
228                                   const base::string16& rest,
229                                   const base::Time& current_time,
230                                   base::Time* result) {
231   base::Time::Exploded time_exploded = { 0 };
232 
233   if (!AbbreviatedMonthToNumber(month, &time_exploded.month)) {
234     // Work around garbage sent by some servers in the same column
235     // as the month. Take just last 3 characters of the string.
236     if (month.length() < 3 ||
237         !AbbreviatedMonthToNumber(month.substr(month.length() - 3),
238                                   &time_exploded.month)) {
239       return false;
240     }
241   }
242 
243   if (!base::StringToInt(day, &time_exploded.day_of_month))
244     return false;
245   if (time_exploded.day_of_month > 31)
246     return false;
247 
248   if (!base::StringToInt(rest, &time_exploded.year)) {
249     // Maybe it's time. Does it look like time? Note that it can be any of
250     // "HH:MM", "H:MM", "HH:M" or maybe even "H:M".
251     if (rest.length() > 5)
252       return false;
253 
254     size_t colon_pos = rest.find(':');
255     if (colon_pos == base::string16::npos)
256       return false;
257     if (colon_pos > 2)
258       return false;
259 
260     if (!base::StringToInt(
261             StringPiece16(rest.begin(), rest.begin() + colon_pos),
262             &time_exploded.hour)) {
263       return false;
264     }
265     if (!base::StringToInt(
266             StringPiece16(rest.begin() + colon_pos + 1, rest.end()),
267             &time_exploded.minute)) {
268       return false;
269     }
270 
271     // Guess the year.
272     base::Time::Exploded current_exploded;
273     current_time.LocalExplode(&current_exploded);
274 
275     // If it's not possible for the parsed date to be in the current year,
276     // use the previous year.
277     if (time_exploded.month > current_exploded.month ||
278         (time_exploded.month == current_exploded.month &&
279          time_exploded.day_of_month > current_exploded.day_of_month)) {
280       time_exploded.year = current_exploded.year - 1;
281     } else {
282       time_exploded.year = current_exploded.year;
283     }
284   }
285 
286   // We don't know the time zone of the listing, so just use local time.
287   *result = base::Time::FromLocalExploded(time_exploded);
288   return true;
289 }
290 
291 // static
WindowsDateListingToTime(const base::string16 & date,const base::string16 & time,base::Time * result)292 bool FtpUtil::WindowsDateListingToTime(const base::string16& date,
293                                        const base::string16& time,
294                                        base::Time* result) {
295   base::Time::Exploded time_exploded = { 0 };
296 
297   // Date should be in format MM-DD-YY[YY].
298   std::vector<base::string16> date_parts;
299   base::SplitString(date, '-', &date_parts);
300   if (date_parts.size() != 3)
301     return false;
302   if (!base::StringToInt(date_parts[0], &time_exploded.month))
303     return false;
304   if (!base::StringToInt(date_parts[1], &time_exploded.day_of_month))
305     return false;
306   if (!base::StringToInt(date_parts[2], &time_exploded.year))
307     return false;
308   if (time_exploded.year < 0)
309     return false;
310   // If year has only two digits then assume that 00-79 is 2000-2079,
311   // and 80-99 is 1980-1999.
312   if (time_exploded.year < 80)
313     time_exploded.year += 2000;
314   else if (time_exploded.year < 100)
315     time_exploded.year += 1900;
316 
317   // Time should be in format HH:MM[(AM|PM)]
318   if (time.length() < 5)
319     return false;
320 
321   std::vector<base::string16> time_parts;
322   base::SplitString(time.substr(0, 5), ':', &time_parts);
323   if (time_parts.size() != 2)
324     return false;
325   if (!base::StringToInt(time_parts[0], &time_exploded.hour))
326     return false;
327   if (!base::StringToInt(time_parts[1], &time_exploded.minute))
328     return false;
329   if (!time_exploded.HasValidValues())
330     return false;
331 
332   if (time.length() > 5) {
333     if (time.length() != 7)
334       return false;
335     base::string16 am_or_pm(time.substr(5, 2));
336     if (EqualsASCII(am_or_pm, "PM")) {
337       if (time_exploded.hour < 12)
338         time_exploded.hour += 12;
339     } else if (EqualsASCII(am_or_pm, "AM")) {
340       if (time_exploded.hour == 12)
341         time_exploded.hour = 0;
342     } else {
343       return false;
344     }
345   }
346 
347   // We don't know the time zone of the server, so just use local time.
348   *result = base::Time::FromLocalExploded(time_exploded);
349   return true;
350 }
351 
352 // static
GetStringPartAfterColumns(const base::string16 & text,int columns)353 base::string16 FtpUtil::GetStringPartAfterColumns(const base::string16& text,
354                                                   int columns) {
355   base::i18n::UTF16CharIterator iter(&text);
356 
357   // TODO(jshin): Is u_isspace the right function to use here?
358   for (int i = 0; i < columns; i++) {
359     // Skip the leading whitespace.
360     while (!iter.end() && u_isspace(iter.get()))
361       iter.Advance();
362 
363     // Skip the actual text of i-th column.
364     while (!iter.end() && !u_isspace(iter.get()))
365       iter.Advance();
366   }
367 
368   base::string16 result(text.substr(iter.array_pos()));
369   base::TrimWhitespace(result, base::TRIM_ALL, &result);
370   return result;
371 }
372 
373 }  // namespace
374