• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/test/perf/generate_profile.h"
6 
7 #include "base/at_exit.h"
8 #include "base/command_line.h"
9 #include "base/file_util.h"
10 #include "base/files/file_enumerator.h"
11 #include "base/files/file_path.h"
12 #include "base/i18n/icu_util.h"
13 #include "base/logging.h"
14 #include "base/message_loop/message_loop.h"
15 #include "base/path_service.h"
16 #include "base/strings/string_number_conversions.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "base/time/time.h"
19 #include "chrome/browser/history/history_service.h"
20 #include "chrome/browser/history/history_service_factory.h"
21 #include "chrome/browser/history/top_sites.h"
22 #include "chrome/common/chrome_paths.h"
23 #include "chrome/common/thumbnail_score.h"
24 #include "chrome/test/base/testing_browser_process.h"
25 #include "chrome/test/base/testing_profile.h"
26 #include "chrome/tools/profiles/thumbnail-inl.h"
27 #include "content/public/browser/browser_thread.h"
28 #include "content/public/browser/notification_service.h"
29 #include "content/public/test/test_browser_thread.h"
30 #include "third_party/skia/include/core/SkBitmap.h"
31 #include "ui/base/resource/resource_bundle.h"
32 #include "ui/base/ui_base_paths.h"
33 #include "ui/gfx/codec/jpeg_codec.h"
34 
35 using base::Time;
36 using content::BrowserThread;
37 
38 namespace {
39 
40 // Probabilities of different word lengths, as measured from Darin's profile.
41 //   kWordLengthProbabilities[n-1] = P(word of length n)
42 const float kWordLengthProbabilities[] = { 0.069f, 0.132f, 0.199f,
43   0.137f, 0.088f, 0.115f, 0.081f, 0.055f, 0.034f, 0.021f, 0.019f, 0.018f,
44   0.007f, 0.007f, 0.005f, 0.004f, 0.003f, 0.003f, 0.003f };
45 
46 // Return a float uniformly in [0,1].
47 // Useful for making probabilistic decisions.
RandomFloat()48 inline float RandomFloat() {
49   return rand() / static_cast<float>(RAND_MAX);
50 }
51 
52 // Return an integer uniformly in [min,max).
RandomInt(int min,int max)53 inline int RandomInt(int min, int max) {
54   return min + (rand() % (max-min));
55 }
56 
57 // Return a string of |count| lowercase random characters.
RandomChars(int count)58 string16 RandomChars(int count) {
59   string16 str;
60   for (int i = 0; i < count; ++i)
61     str += L'a' + rand() % 26;
62   return str;
63 }
64 
RandomWord()65 string16 RandomWord() {
66   // TODO(evanm): should we instead use the markov chain based
67   // version of this that I already wrote?
68 
69   // Sample a word length from kWordLengthProbabilities.
70   float sample = RandomFloat();
71   size_t i;
72   for (i = 0; i < arraysize(kWordLengthProbabilities); ++i) {
73     sample -= kWordLengthProbabilities[i];
74     if (sample < 0) break;
75   }
76   const int word_length = i + 1;
77   return RandomChars(word_length);
78 }
79 
80 // Return a string of |count| random words.
RandomWords(int count)81 string16 RandomWords(int count) {
82   string16 str;
83   for (int i = 0; i < count; ++i) {
84     if (!str.empty())
85       str += L' ';
86     str += RandomWord();
87   }
88   return str;
89 }
90 
91 // Return a random URL-looking string.
ConstructRandomURL()92 GURL ConstructRandomURL() {
93   return GURL(ASCIIToUTF16("http://") + RandomChars(3) + ASCIIToUTF16(".com/") +
94       RandomChars(RandomInt(5, 20)));
95 }
96 
97 // Return a random page title-looking string.
ConstructRandomTitle()98 string16 ConstructRandomTitle() {
99   return RandomWords(RandomInt(3, 15));
100 }
101 
102 // Insert a batch of |batch_size| URLs, starting at pageid |page_id|.
InsertURLBatch(Profile * profile,int page_id,int batch_size,int types)103 void InsertURLBatch(Profile* profile,
104                     int page_id,
105                     int batch_size,
106                     int types) {
107   HistoryService* history_service =
108       HistoryServiceFactory::GetForProfile(profile, Profile::EXPLICIT_ACCESS);
109 
110   // Probability of following a link on the current "page"
111   // (vs randomly jumping to a new page).
112   const float kFollowLinkProbability = 0.85f;
113   // Probability of visiting a page we've visited before.
114   const float kRevisitLinkProbability = 0.1f;
115   // Probability of a URL being "good enough" to revisit.
116   const float kRevisitableURLProbability = 0.05f;
117   // Probability of a URL being the end of a redirect chain.
118   const float kRedirectProbability = 0.05f;
119 
120   // A list of URLs that we sometimes revisit.
121   std::vector<GURL> revisit_urls;
122 
123   // Scoping value for page IDs (required by the history service).
124   void* id_scope = reinterpret_cast<void*>(1);
125 
126   scoped_refptr<base::RefCountedMemory> google_bitmap(
127       new base::RefCountedStaticMemory(kGoogleThumbnail,
128                                        sizeof(kGoogleThumbnail)));
129   scoped_refptr<base::RefCountedMemory> weewar_bitmap(
130       new base::RefCountedStaticMemory(kWeewarThumbnail,
131                                        sizeof(kWeewarThumbnail)));
132 
133   printf("Inserting %d URLs...\n", batch_size);
134   GURL previous_url;
135   content::PageTransition transition = content::PAGE_TRANSITION_TYPED;
136   const int end_page_id = page_id + batch_size;
137   history::TopSites* top_sites = profile->GetTopSites();
138   for (; page_id < end_page_id; ++page_id) {
139     // Randomly decide whether this new URL simulates following a link or
140     // whether it's a jump to a new URL.
141     if (!previous_url.is_empty() && RandomFloat() < kFollowLinkProbability) {
142       transition = content::PAGE_TRANSITION_LINK;
143     } else {
144       previous_url = GURL();
145       transition = content::PAGE_TRANSITION_TYPED;
146     }
147 
148     // Pick a URL, either newly at random or from our list of previously
149     // visited URLs.
150     GURL url;
151     if (!revisit_urls.empty() && RandomFloat() < kRevisitLinkProbability) {
152       // Draw a URL from revisit_urls at random.
153       url = revisit_urls[RandomInt(0, static_cast<int>(revisit_urls.size()))];
154     } else {
155       url = ConstructRandomURL();
156     }
157 
158     // Randomly construct a redirect chain.
159     history::RedirectList redirects;
160     if (RandomFloat() < kRedirectProbability) {
161       const int redir_count = RandomInt(1, 4);
162       for (int i = 0; i < redir_count; ++i)
163         redirects.push_back(ConstructRandomURL());
164       redirects.push_back(url);
165     }
166 
167     // Add all of this information to the history service.
168     history_service->AddPage(url, base::Time::Now(),
169                              id_scope, page_id,
170                              previous_url, redirects,
171                              transition, history::SOURCE_BROWSED, true);
172     ThumbnailScore score(0.75, false, false);
173     history_service->SetPageTitle(url, ConstructRandomTitle());
174     if (types & TOP_SITES && top_sites) {
175       top_sites->SetPageThumbnailToJPEGBytes(
176           url,
177           (RandomInt(0, 2) == 0) ? google_bitmap.get() : weewar_bitmap.get(),
178           score);
179     }
180 
181     previous_url = url;
182 
183     if (revisit_urls.empty() || RandomFloat() < kRevisitableURLProbability)
184       revisit_urls.push_back(url);
185   }
186 }
187 
188 }  // namespace
189 
GenerateProfile(GenerateProfileTypes types,int url_count,const base::FilePath & dst_dir)190 bool GenerateProfile(GenerateProfileTypes types,
191                      int url_count,
192                      const base::FilePath& dst_dir) {
193   if (!base::CreateDirectory(dst_dir)) {
194     PLOG(ERROR) << "Unable to create directory " << dst_dir.value().c_str();
195     return false;
196   }
197 
198   // We want this profile to be as deterministic as possible, so seed the
199   // random number generator with the number of urls we're generating.
200   srand(static_cast<unsigned int>(url_count));
201 
202   printf("Creating profiles for testing...\n");
203 
204   TestingBrowserProcessInitializer initialize_browser_process;
205   base::MessageLoopForUI message_loop;
206   content::TestBrowserThread ui_thread(BrowserThread::UI, &message_loop);
207   content::TestBrowserThread db_thread(BrowserThread::DB, &message_loop);
208   TestingProfile profile;
209   if (!profile.CreateHistoryService(false, false)) {
210       PLOG(ERROR) << "Creating history service failed";
211       return false;
212   }
213   if (types & TOP_SITES) {
214     profile.CreateTopSites();
215     profile.BlockUntilTopSitesLoaded();
216   }
217 
218   // The maximum number of URLs to insert into history in one batch.
219   const int kBatchSize = 2000;
220   int page_id = 0;
221   while (page_id < url_count) {
222     const int batch_size = std::min(kBatchSize, url_count - page_id);
223     InsertURLBatch(&profile, page_id, batch_size, types);
224     // Run all pending messages to give TopSites a chance to catch up.
225     message_loop.RunUntilIdle();
226     page_id += batch_size;
227   }
228 
229   profile.DestroyTopSites();
230   profile.DestroyHistoryService();
231 
232   message_loop.RunUntilIdle();
233 
234   base::FileEnumerator file_iterator(profile.GetPath(), false,
235                                      base::FileEnumerator::FILES);
236   base::FilePath path = file_iterator.Next();
237   while (!path.empty()) {
238     base::FilePath dst_file = dst_dir.Append(path.BaseName());
239     base::DeleteFile(dst_file, false);
240     if (!base::CopyFile(path, dst_file)) {
241       PLOG(ERROR) << "Copying file failed";
242       return false;
243     }
244     path = file_iterator.Next();
245   }
246 
247   printf("Finished creating profiles for testing.\n");
248 
249   // Restore the random seed.
250   srand(static_cast<unsigned int>(Time::Now().ToInternalValue()));
251 
252   return true;
253 }
254