• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_
6 #define CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_
7 #pragma once
8 
9 #include <deque>
10 #include <map>
11 #include <set>
12 #include <string>
13 #include <vector>
14 
15 #include "base/basictypes.h"
16 #include "base/memory/ref_counted_memory.h"
17 #include "base/stack_container.h"
18 #include "base/string16.h"
19 #include "base/time.h"
20 #include "chrome/browser/history/snippet.h"
21 #include "chrome/browser/search_engines/template_url_id.h"
22 #include "chrome/common/ref_counted_util.h"
23 #include "chrome/common/thumbnail_score.h"
24 #include "content/common/page_transition_types.h"
25 #include "googleurl/src/gurl.h"
26 
27 namespace history {
28 
29 // Forward declaration for friend statements.
30 class HistoryBackend;
31 class URLDatabase;
32 
33 // Structure to hold redirect lists for URLs.  For a redirect chain
34 // A -> B -> C, and entry in the map would look like "A => {B -> C}".
35 typedef std::map<GURL, scoped_refptr<RefCountedVector<GURL> > > RedirectMap;
36 
37 // Container for a list of URLs.
38 typedef std::vector<GURL> RedirectList;
39 
40 typedef int64 StarID;  // Unique identifier for star entries.
41 typedef int64 UIStarID;  // Identifier for star entries that come from the UI.
42 typedef int64 DownloadID;   // Identifier for a download.
43 typedef int64 FaviconID;  // For favicons.
44 typedef int64 SegmentID;  // URL segments for the most visited view.
45 typedef int64 IconMappingID; // For page url and icon mapping.
46 
47 // URLRow ---------------------------------------------------------------------
48 
49 typedef int64 URLID;
50 
51 // Holds all information globally associated with one URL (one row in the
52 // URL table).
53 //
54 // This keeps track of dirty bits, which are currently unused:
55 //
56 // TODO(brettw) the dirty bits are broken in a number of respects. First, the
57 // database will want to update them on a const object, so they need to be
58 // mutable.
59 //
60 // Second, there is a problem copying. If you make a copy of this structure
61 // (as we allow since we put this into vectors in various places) then the
62 // dirty bits will not be in sync for these copies.
63 class URLRow {
64  public:
65   URLRow();
66 
67   explicit URLRow(const GURL& url);
68 
69   // We need to be able to set the id of a URLRow that's being passed through
70   // an IPC message.  This constructor should probably not be used otherwise.
71   URLRow(const GURL& url, URLID id);
72 
73   virtual ~URLRow();
74   URLRow& operator=(const URLRow& other);
75 
id()76   URLID id() const { return id_; }
url()77   const GURL& url() const { return url_; }
78 
title()79   const string16& title() const {
80     return title_;
81   }
set_title(const string16 & title)82   void set_title(const string16& title) {
83     // The title is frequently set to the same thing, so we don't bother
84     // updating unless the string has changed.
85     if (title != title_) {
86       title_ = title;
87     }
88   }
89 
visit_count()90   int visit_count() const {
91     return visit_count_;
92   }
set_visit_count(int visit_count)93   void set_visit_count(int visit_count) {
94     visit_count_ = visit_count;
95   }
96 
97   // Number of times the URL was typed in the Omnibox.
typed_count()98   int typed_count() const {
99     return typed_count_;
100   }
set_typed_count(int typed_count)101   void set_typed_count(int typed_count) {
102     typed_count_ = typed_count;
103   }
104 
last_visit()105   base::Time last_visit() const {
106     return last_visit_;
107   }
set_last_visit(base::Time last_visit)108   void set_last_visit(base::Time last_visit) {
109     last_visit_ = last_visit;
110   }
111 
112   // If this is set, we won't autocomplete this URL.
hidden()113   bool hidden() const {
114     return hidden_;
115   }
set_hidden(bool hidden)116   void set_hidden(bool hidden) {
117     hidden_ = hidden;
118   }
119 
120  protected:
121   // Swaps the contents of this URLRow with another, which allows it to be
122   // destructively copied without memory allocations.
123   void Swap(URLRow* other);
124 
125  private:
126   // This class writes directly into this structure and clears our dirty bits
127   // when reading out of the DB.
128   friend class URLDatabase;
129   friend class HistoryBackend;
130 
131   // Initializes all values that need initialization to their defaults.
132   // This excludes objects which autoinitialize such as strings.
133   void Initialize();
134 
135   // The row ID of this URL. Immutable except for the database which sets it
136   // when it pulls them out.
137   URLID id_;
138 
139   // The URL of this row. Immutable except for the database which sets it
140   // when it pulls them out. If clients want to change it, they must use
141   // the constructor to make a new one.
142   GURL url_;
143 
144   string16 title_;
145 
146   // Total number of times this URL has been visited.
147   int visit_count_;
148 
149   // Number of times this URL has been manually entered in the URL bar.
150   int typed_count_;
151 
152   // The date of the last visit of this URL, which saves us from having to
153   // loop up in the visit table for things like autocomplete and expiration.
154   base::Time last_visit_;
155 
156   // Indicates this entry should now be shown in typical UI or queries, this
157   // is usually for subframes.
158   bool hidden_;
159 
160   // We support the implicit copy constuctor and operator=.
161 };
162 
163 // The enumeration of all possible sources of visits is listed below.
164 // The source will be propagated along with a URL or a visit item
165 // and eventually be stored in the history database,
166 // visit_source table specifically.
167 // Different from page transition types, they describe the origins of visits.
168 // (Warning): Please don't change any existing values while it is ok to add
169 // new values when needed.
170 enum VisitSource {
171   SOURCE_SYNCED = 0,         // Synchronized from somewhere else.
172   SOURCE_BROWSED = 1,        // User browsed.
173   SOURCE_EXTENSION = 2,      // Added by an externsion.
174   SOURCE_FIREFOX_IMPORTED = 3,
175   SOURCE_IE_IMPORTED = 4,
176   SOURCE_SAFARI_IMPORTED = 5,
177 };
178 
179 typedef int64 VisitID;
180 // Structure to hold the mapping between each visit's id and its source.
181 typedef std::map<VisitID, VisitSource> VisitSourceMap;
182 
183 // VisitRow -------------------------------------------------------------------
184 
185 // Holds all information associated with a specific visit. A visit holds time
186 // and referrer information for one time a URL is visited.
187 class VisitRow {
188  public:
189   VisitRow();
190   VisitRow(URLID arg_url_id,
191            base::Time arg_visit_time,
192            VisitID arg_referring_visit,
193            PageTransition::Type arg_transition,
194            SegmentID arg_segment_id);
195   ~VisitRow();
196 
197   // ID of this row (visit ID, used a a referrer for other visits).
198   VisitID visit_id;
199 
200   // Row ID into the URL table of the URL that this page is.
201   URLID url_id;
202 
203   base::Time visit_time;
204 
205   // Indicates another visit that was the referring page for this one.
206   // 0 indicates no referrer.
207   VisitID referring_visit;
208 
209   // A combination of bits from PageTransition.
210   PageTransition::Type transition;
211 
212   // The segment id (see visitsegment_database.*).
213   // If 0, the segment id is null in the table.
214   SegmentID segment_id;
215 
216   // True when this visit has indexed data for it. We try to keep this in sync
217   // with the full text index: when we add or remove things from there, we will
218   // update the visit table as well. However, that file could get deleted, or
219   // out of sync in various ways, so this flag should be false when things
220   // change.
221   bool is_indexed;
222 
223   // Compares two visits based on dates, for sorting.
224   bool operator<(const VisitRow& other) {
225     return visit_time < other.visit_time;
226   }
227 
228   // We allow the implicit copy constuctor and operator=.
229 };
230 
231 // We pass around vectors of visits a lot
232 typedef std::vector<VisitRow> VisitVector;
233 
234 // Favicons -------------------------------------------------------------------
235 
236 // Used by the importer to set favicons for imported bookmarks.
237 struct ImportedFaviconUsage {
238   ImportedFaviconUsage();
239   ~ImportedFaviconUsage();
240 
241   // The URL of the favicon.
242   GURL favicon_url;
243 
244   // The raw png-encoded data.
245   std::vector<unsigned char> png_data;
246 
247   // The list of URLs using this favicon.
248   std::set<GURL> urls;
249 };
250 
251 // PageVisit ------------------------------------------------------------------
252 
253 // Represents a simplified version of a visit for external users. Normally,
254 // views are only interested in the time, and not the other information
255 // associated with a VisitRow.
256 struct PageVisit {
257   URLID page_id;
258   base::Time visit_time;
259 };
260 
261 // StarredEntry ---------------------------------------------------------------
262 
263 // StarredEntry represents either a starred page, or a folder (where a folder
264 // consists of child starred entries). Use the type to determine the type of a
265 // particular entry.
266 //
267 // The database internally uses the id field to uniquely identify a starred
268 // entry. On the other hand, the UI, which is anything routed through
269 // HistoryService and HistoryBackend (including BookmarkBarView), uses the
270 // url field to uniquely identify starred entries of type URL and the folder_id
271 // field to uniquely identify starred entries of type USER_FOLDER. For example,
272 // HistoryService::UpdateStarredEntry identifies the entry by url (if the
273 // type is URL) or folder_id (if the type is not URL).
274 struct StarredEntry {
275   enum Type {
276     // Type represents a starred URL.
277     URL,
278 
279     // The bookmark bar folder.
280     BOOKMARK_BAR,
281 
282     // User created folder.
283     USER_FOLDER,
284 
285     // The "other bookmarks" folder that holds uncategorized bookmarks.
286     OTHER
287   };
288 
289   StarredEntry();
290   ~StarredEntry();
291 
292   void Swap(StarredEntry* other);
293 
294   // Unique identifier of this entry.
295   StarID id;
296 
297   // Title.
298   string16 title;
299 
300   // When this was added.
301   base::Time date_added;
302 
303   // Folder ID of the folder this entry is in. If 0, this entry is not in a
304   // folder.
305   UIStarID parent_folder_id;
306 
307   // Unique identifier for folders. This is assigned by the UI.
308   //
309   // WARNING: this is NOT the same as id, id is assigned by the database,
310   // this is assigned by the UI. See note about StarredEntry for more info.
311   UIStarID folder_id;
312 
313   // Visual order within the parent. Only valid if folder_id is not 0.
314   int visual_order;
315 
316   // Type of this entry (see enum).
317   Type type;
318 
319   // If type == URL, this is the URL of the page that was starred.
320   GURL url;
321 
322   // If type == URL, this is the ID of the URL of the primary page that was
323   // starred.
324   URLID url_id;
325 
326   // Time the entry was last modified. This is only used for folders and
327   // indicates the last time a URL was added as a child to the folder.
328   base::Time date_folder_modified;
329 };
330 
331 // URLResult -------------------------------------------------------------------
332 
333 class URLResult : public URLRow {
334  public:
335   URLResult();
336   URLResult(const GURL& url, base::Time visit_time);
337   // Constructor that create a URLResult from the specified URL and title match
338   // positions from title_matches.
339   URLResult(const GURL& url, const Snippet::MatchPositions& title_matches);
340   ~URLResult();
341 
visit_time()342   base::Time visit_time() const { return visit_time_; }
set_visit_time(base::Time visit_time)343   void set_visit_time(base::Time visit_time) { visit_time_ = visit_time; }
344 
snippet()345   const Snippet& snippet() const { return snippet_; }
346 
347   // If this is a title match, title_match_positions contains an entry for
348   // every word in the title that matched one of the query parameters. Each
349   // entry contains the start and end of the match.
title_match_positions()350   const Snippet::MatchPositions& title_match_positions() const {
351     return title_match_positions_;
352   }
353 
354   void SwapResult(URLResult* other);
355 
356  private:
357   friend class HistoryBackend;
358 
359   // The time that this result corresponds to.
360   base::Time visit_time_;
361 
362   // These values are typically set by HistoryBackend.
363   Snippet snippet_;
364   Snippet::MatchPositions title_match_positions_;
365 
366   // We support the implicit copy constructor and operator=.
367 };
368 
369 // QueryResults ----------------------------------------------------------------
370 
371 // Encapsulates the results of a history query. It supports an ordered list of
372 // URLResult objects, plus an efficient way of looking up the index of each time
373 // a given URL appears in those results.
374 class QueryResults {
375  public:
376   typedef std::vector<URLResult*> URLResultVector;
377 
378   QueryResults();
379   ~QueryResults();
380 
381   // Indicates the first time that the query includes results for (queries are
382   // clipped at the beginning, so it will always include to the end of the time
383   // queried).
384   //
385   // If the number of results was clipped as a result of the max count, this
386   // will be the time of the first query returned. If there were fewer results
387   // than we were allowed to return, this represents the first date considered
388   // in the query (this will be before the first result if there was time
389   // queried with no results).
390   //
391   // TODO(brettw): bug 1203054: This field is not currently set properly! Do
392   // not use until the bug is fixed.
first_time_searched()393   base::Time first_time_searched() const { return first_time_searched_; }
set_first_time_searched(base::Time t)394   void set_first_time_searched(base::Time t) { first_time_searched_ = t; }
395   // Note: If you need end_time_searched, it can be added.
396 
set_reached_beginning(bool reached)397   void set_reached_beginning(bool reached) { reached_beginning_ = reached; }
reached_beginning()398   bool reached_beginning() { return reached_beginning_; }
399 
size()400   size_t size() const { return results_.size(); }
empty()401   bool empty() const { return results_.empty(); }
402 
403   URLResult& operator[](size_t i) { return *results_[i]; }
404   const URLResult& operator[](size_t i) const { return *results_[i]; }
405 
begin()406   URLResultVector::const_iterator begin() const { return results_.begin(); }
end()407   URLResultVector::const_iterator end() const { return results_.end(); }
rbegin()408   URLResultVector::const_reverse_iterator rbegin() const {
409     return results_.rbegin();
410   }
rend()411   URLResultVector::const_reverse_iterator rend() const {
412     return results_.rend();
413   }
414 
415   // Returns a pointer to the beginning of an array of all matching indices
416   // for entries with the given URL. The array will be |*num_matches| long.
417   // |num_matches| can be NULL if the caller is not interested in the number of
418   // results (commonly it will only be interested in the first one and can test
419   // the pointer for NULL).
420   //
421   // When there is no match, it will return NULL and |*num_matches| will be 0.
422   const size_t* MatchesForURL(const GURL& url, size_t* num_matches) const;
423 
424   // Swaps the current result with another. This allows ownership to be
425   // efficiently transferred without copying.
426   void Swap(QueryResults* other);
427 
428   // Adds the given result to the map, using swap() on the members to avoid
429   // copying (there are a lot of strings and vectors). This means the parameter
430   // object will be cleared after this call.
431   void AppendURLBySwapping(URLResult* result);
432 
433   // Appends a new result set to the other. The |other| results will be
434   // destroyed because the pointer ownership will just be transferred. When
435   // |remove_dupes| is set, each URL that appears in this array will be removed
436   // from the |other| array before appending.
437   void AppendResultsBySwapping(QueryResults* other, bool remove_dupes);
438 
439   // Removes all instances of the given URL from the result set.
440   void DeleteURL(const GURL& url);
441 
442   // Deletes the given range of items in the result set.
443   void DeleteRange(size_t begin, size_t end);
444 
445  private:
446   // Maps the given URL to a list of indices into results_ which identify each
447   // time an entry with that URL appears. Normally, each URL will have one or
448   // very few indices after it, so we optimize this to use statically allocated
449   // memory when possible.
450   typedef std::map<GURL, StackVector<size_t, 4> > URLToResultIndices;
451 
452   // Inserts an entry into the |url_to_results_| map saying that the given URL
453   // is at the given index in the results_.
454   void AddURLUsageAtIndex(const GURL& url, size_t index);
455 
456   // Adds |delta| to each index in url_to_results_ in the range [begin,end]
457   // (this is inclusive). This is used when inserting or deleting.
458   void AdjustResultMap(size_t begin, size_t end, ptrdiff_t delta);
459 
460   base::Time first_time_searched_;
461 
462   // Whether the query reaches the beginning of the database.
463   bool reached_beginning_;
464 
465   // The ordered list of results. The pointers inside this are owned by this
466   // QueryResults object.
467   URLResultVector results_;
468 
469   // Maps URLs to entries in results_.
470   URLToResultIndices url_to_results_;
471 
472   DISALLOW_COPY_AND_ASSIGN(QueryResults);
473 };
474 
475 // QueryOptions ----------------------------------------------------------------
476 
477 struct QueryOptions {
478   QueryOptions();
479 
480   // The time range to search for matches in.
481   //
482   // This will match only the one recent visit of a URL.  For text search
483   // queries, if the URL was visited in the given time period, but has also been
484   // visited more recently than that, it will not be returned. When the text
485   // query is empty, this will return the most recent visit within the time
486   // range.
487   //
488   // As a special case, if both times are is_null(), then the entire database
489   // will be searched. However, if you set one, you must set the other.
490   //
491   // The beginning is inclusive and the ending is exclusive.
492   base::Time begin_time;
493   base::Time end_time;
494 
495   // Sets the query time to the last |days_ago| days to the present time.
496   void SetRecentDayRange(int days_ago);
497 
498   // The maximum number of results to return. The results will be sorted with
499   // the most recent first, so older results may not be returned if there is not
500   // enough room. When 0, this will return everything (the default).
501   int max_count;
502 };
503 
504 // KeywordSearchTermVisit -----------------------------------------------------
505 
506 // KeywordSearchTermVisit is returned from GetMostRecentKeywordSearchTerms. It
507 // gives the time and search term of the keyword visit.
508 struct KeywordSearchTermVisit {
509   KeywordSearchTermVisit();
510   ~KeywordSearchTermVisit();
511 
512   // The time of the visit.
513   base::Time time;
514 
515   // The search term that was used.
516   string16 term;
517 };
518 
519 // KeywordSearchTermRow --------------------------------------------------------
520 
521 // Used for URLs that have a search term associated with them.
522 struct KeywordSearchTermRow {
523   KeywordSearchTermRow();
524   ~KeywordSearchTermRow();
525 
526   // ID of the keyword.
527   TemplateURLID keyword_id;
528 
529   // ID of the url.
530   URLID url_id;
531 
532   // The search term that was used.
533   string16 term;
534 };
535 
536 // MostVisitedURL --------------------------------------------------------------
537 
538 // Holds the per-URL information of the most visited query.
539 struct MostVisitedURL {
540   MostVisitedURL();
541   MostVisitedURL(const GURL& in_url,
542                  const GURL& in_favicon_url,
543                  const string16& in_title);
544   ~MostVisitedURL();
545 
546   GURL url;
547   GURL favicon_url;
548   string16 title;
549 
550   RedirectList redirects;
551 
552   bool operator==(const MostVisitedURL& other) {
553     return url == other.url;
554   }
555 };
556 
557 // Navigation -----------------------------------------------------------------
558 
559 // Marshalling structure for AddPage.
560 class HistoryAddPageArgs
561     : public base::RefCountedThreadSafe<HistoryAddPageArgs> {
562  public:
563   HistoryAddPageArgs(const GURL& arg_url,
564                      base::Time arg_time,
565                      const void* arg_id_scope,
566                      int32 arg_page_id,
567                      const GURL& arg_referrer,
568                      const history::RedirectList& arg_redirects,
569                      PageTransition::Type arg_transition,
570                      VisitSource arg_source,
571                      bool arg_did_replace_entry);
572 
573   // Returns a new HistoryAddPageArgs that is a copy of this (ref count is
574   // of course reset). Ownership of returned object passes to caller.
575   HistoryAddPageArgs* Clone() const;
576 
577   GURL url;
578   base::Time time;
579 
580   const void* id_scope;
581   int32 page_id;
582 
583   GURL referrer;
584   history::RedirectList redirects;
585   PageTransition::Type transition;
586   VisitSource visit_source;
587   bool did_replace_entry;
588 
589  private:
590   friend class base::RefCountedThreadSafe<HistoryAddPageArgs>;
591 
592   ~HistoryAddPageArgs();
593 
594   DISALLOW_COPY_AND_ASSIGN(HistoryAddPageArgs);
595 };
596 
597 // TopSites -------------------------------------------------------------------
598 
599 typedef std::vector<MostVisitedURL> MostVisitedURLList;
600 
601 // Used by TopSites to store the thumbnails.
602 struct Images {
603   Images();
604   ~Images();
605 
606   scoped_refptr<RefCountedBytes> thumbnail;
607   ThumbnailScore thumbnail_score;
608 
609   // TODO(brettw): this will eventually store the favicon.
610   // scoped_refptr<RefCountedBytes> favicon;
611 };
612 
613 typedef std::vector<MostVisitedURL> MostVisitedURLList;
614 
615 struct MostVisitedURLWithRank {
616   MostVisitedURL url;
617   int rank;
618 };
619 
620 typedef std::vector<MostVisitedURLWithRank> MostVisitedURLWithRankList;
621 
622 struct TopSitesDelta {
623   TopSitesDelta();
624   ~TopSitesDelta();
625 
626   MostVisitedURLList deleted;
627   MostVisitedURLWithRankList added;
628   MostVisitedURLWithRankList moved;
629 };
630 
631 typedef std::map<GURL, scoped_refptr<RefCountedBytes> > URLToThumbnailMap;
632 
633 // Used when migrating most visited thumbnails out of history and into topsites.
634 struct ThumbnailMigration {
635   ThumbnailMigration();
636   ~ThumbnailMigration();
637 
638   MostVisitedURLList most_visited;
639   URLToThumbnailMap url_to_thumbnail_map;
640 };
641 
642 typedef std::map<GURL, Images> URLToImagesMap;
643 
644 class MostVisitedThumbnails
645     : public base::RefCountedThreadSafe<MostVisitedThumbnails> {
646  public:
647   MostVisitedThumbnails();
648   virtual ~MostVisitedThumbnails();
649 
650   MostVisitedURLList most_visited;
651   URLToImagesMap url_to_images_map;
652 
653  private:
654   friend class base::RefCountedThreadSafe<MostVisitedThumbnails>;
655 
656   DISALLOW_COPY_AND_ASSIGN(MostVisitedThumbnails);
657 };
658 
659 // Autocomplete thresholds -----------------------------------------------------
660 
661 // Constants which specify, when considered altogether, 'significant'
662 // history items. These are used to filter out insignificant items
663 // for consideration as autocomplete candidates.
664 extern const int kLowQualityMatchTypedLimit;
665 extern const int kLowQualityMatchVisitLimit;
666 extern const int kLowQualityMatchAgeLimitInDays;
667 
668 // Returns the date threshold for considering an history item as significant.
669 base::Time AutocompleteAgeThreshold();
670 
671 // Return true if |row| qualifies as an autocomplete candidate. If |time_cache|
672 // is_null() then this function determines a new time threshold each time it is
673 // called. Since getting system time can be costly (such as for cases where
674 // this function will be called in a loop over many history items), you can
675 // provide a non-null |time_cache| by simply initializing |time_cache| with
676 // AutocompleteAgeThreshold() (or any other desired time in the past).
677 bool RowQualifiesAsSignificant(const URLRow& row, const base::Time& threshold);
678 
679 // Defines the icon types. They are also stored in icon_type field of favicons
680 // table.
681 enum IconType {
682   INVALID_ICON = 0x0,
683   FAVICON = 1 << 0,
684   TOUCH_ICON = 1 << 1,
685   TOUCH_PRECOMPOSED_ICON = 1 << 2
686 };
687 
688 // Used for the mapping between the page and icon.
689 struct IconMapping {
690   IconMapping();
691   ~IconMapping();
692 
693   // The unique id of the mapping.
694   IconMappingID mapping_id;
695 
696   // The url of a web page.
697   GURL page_url;
698 
699   // The unique id of the icon.
700   FaviconID icon_id;
701 
702   // The type of icon.
703   IconType icon_type;
704 };
705 
706 // Defines the favicon stored in history backend.
707 struct FaviconData {
708   FaviconData();
709   ~FaviconData();
710 
711   // Returns true if the icon is known and image has data.
712   bool is_valid();
713 
714   // Indicates whether the icon is known by the history backend.
715   bool known_icon;
716 
717   // The bits of image.
718   scoped_refptr<RefCountedMemory> image_data;
719 
720   // Indicates whether image is expired.
721   bool expired;
722 
723   // The icon's URL.
724   GURL icon_url;
725 
726   // The type of favicon.
727   history::IconType icon_type;
728 };
729 
730 }  // namespace history
731 
732 #endif  // CHROME_BROWSER_HISTORY_HISTORY_TYPES_H_
733