• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. See the AUTHORS file for names of contributors.
4 
5 #include "db/db_iter.h"
6 
7 #include "db/filename.h"
8 #include "db/db_impl.h"
9 #include "db/dbformat.h"
10 #include "leveldb/env.h"
11 #include "leveldb/iterator.h"
12 #include "port/port.h"
13 #include "util/logging.h"
14 #include "util/mutexlock.h"
15 #include "util/random.h"
16 
17 namespace leveldb {
18 
19 #if 0
20 static void DumpInternalIter(Iterator* iter) {
21   for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
22     ParsedInternalKey k;
23     if (!ParseInternalKey(iter->key(), &k)) {
24       fprintf(stderr, "Corrupt '%s'\n", EscapeString(iter->key()).c_str());
25     } else {
26       fprintf(stderr, "@ '%s'\n", k.DebugString().c_str());
27     }
28   }
29 }
30 #endif
31 
32 namespace {
33 
34 // Memtables and sstables that make the DB representation contain
35 // (userkey,seq,type) => uservalue entries.  DBIter
36 // combines multiple entries for the same userkey found in the DB
37 // representation into a single entry while accounting for sequence
38 // numbers, deletion markers, overwrites, etc.
39 class DBIter: public Iterator {
40  public:
41   // Which direction is the iterator currently moving?
42   // (1) When moving forward, the internal iterator is positioned at
43   //     the exact entry that yields this->key(), this->value()
44   // (2) When moving backwards, the internal iterator is positioned
45   //     just before all entries whose user key == this->key().
46   enum Direction {
47     kForward,
48     kReverse
49   };
50 
DBIter(DBImpl * db,const Comparator * cmp,Iterator * iter,SequenceNumber s,uint32_t seed)51   DBIter(DBImpl* db, const Comparator* cmp, Iterator* iter, SequenceNumber s,
52          uint32_t seed)
53       : db_(db),
54         user_comparator_(cmp),
55         iter_(iter),
56         sequence_(s),
57         direction_(kForward),
58         valid_(false),
59         rnd_(seed),
60         bytes_counter_(RandomPeriod()) {
61   }
~DBIter()62   virtual ~DBIter() {
63     delete iter_;
64   }
Valid() const65   virtual bool Valid() const { return valid_; }
key() const66   virtual Slice key() const {
67     assert(valid_);
68     return (direction_ == kForward) ? ExtractUserKey(iter_->key()) : saved_key_;
69   }
value() const70   virtual Slice value() const {
71     assert(valid_);
72     return (direction_ == kForward) ? iter_->value() : saved_value_;
73   }
status() const74   virtual Status status() const {
75     if (status_.ok()) {
76       return iter_->status();
77     } else {
78       return status_;
79     }
80   }
81 
82   virtual void Next();
83   virtual void Prev();
84   virtual void Seek(const Slice& target);
85   virtual void SeekToFirst();
86   virtual void SeekToLast();
87 
88  private:
89   void FindNextUserEntry(bool skipping, std::string* skip);
90   void FindPrevUserEntry();
91   bool ParseKey(ParsedInternalKey* key);
92 
SaveKey(const Slice & k,std::string * dst)93   inline void SaveKey(const Slice& k, std::string* dst) {
94     dst->assign(k.data(), k.size());
95   }
96 
ClearSavedValue()97   inline void ClearSavedValue() {
98     if (saved_value_.capacity() > 1048576) {
99       std::string empty;
100       swap(empty, saved_value_);
101     } else {
102       saved_value_.clear();
103     }
104   }
105 
106   // Pick next gap with average value of config::kReadBytesPeriod.
RandomPeriod()107   ssize_t RandomPeriod() {
108     return rnd_.Uniform(2*config::kReadBytesPeriod);
109   }
110 
111   DBImpl* db_;
112   const Comparator* const user_comparator_;
113   Iterator* const iter_;
114   SequenceNumber const sequence_;
115 
116   Status status_;
117   std::string saved_key_;     // == current key when direction_==kReverse
118   std::string saved_value_;   // == current raw value when direction_==kReverse
119   Direction direction_;
120   bool valid_;
121 
122   Random rnd_;
123   ssize_t bytes_counter_;
124 
125   // No copying allowed
126   DBIter(const DBIter&);
127   void operator=(const DBIter&);
128 };
129 
ParseKey(ParsedInternalKey * ikey)130 inline bool DBIter::ParseKey(ParsedInternalKey* ikey) {
131   Slice k = iter_->key();
132   ssize_t n = k.size() + iter_->value().size();
133   bytes_counter_ -= n;
134   while (bytes_counter_ < 0) {
135     bytes_counter_ += RandomPeriod();
136     db_->RecordReadSample(k);
137   }
138   if (!ParseInternalKey(k, ikey)) {
139     status_ = Status::Corruption("corrupted internal key in DBIter");
140     return false;
141   } else {
142     return true;
143   }
144 }
145 
Next()146 void DBIter::Next() {
147   assert(valid_);
148 
149   if (direction_ == kReverse) {  // Switch directions?
150     direction_ = kForward;
151     // iter_ is pointing just before the entries for this->key(),
152     // so advance into the range of entries for this->key() and then
153     // use the normal skipping code below.
154     if (!iter_->Valid()) {
155       iter_->SeekToFirst();
156     } else {
157       iter_->Next();
158     }
159     if (!iter_->Valid()) {
160       valid_ = false;
161       saved_key_.clear();
162       return;
163     }
164     // saved_key_ already contains the key to skip past.
165   } else {
166     // Store in saved_key_ the current key so we skip it below.
167     SaveKey(ExtractUserKey(iter_->key()), &saved_key_);
168   }
169 
170   FindNextUserEntry(true, &saved_key_);
171 }
172 
FindNextUserEntry(bool skipping,std::string * skip)173 void DBIter::FindNextUserEntry(bool skipping, std::string* skip) {
174   // Loop until we hit an acceptable entry to yield
175   assert(iter_->Valid());
176   assert(direction_ == kForward);
177   do {
178     ParsedInternalKey ikey;
179     if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
180       switch (ikey.type) {
181         case kTypeDeletion:
182           // Arrange to skip all upcoming entries for this key since
183           // they are hidden by this deletion.
184           SaveKey(ikey.user_key, skip);
185           skipping = true;
186           break;
187         case kTypeValue:
188           if (skipping &&
189               user_comparator_->Compare(ikey.user_key, *skip) <= 0) {
190             // Entry hidden
191           } else {
192             valid_ = true;
193             saved_key_.clear();
194             return;
195           }
196           break;
197       }
198     }
199     iter_->Next();
200   } while (iter_->Valid());
201   saved_key_.clear();
202   valid_ = false;
203 }
204 
Prev()205 void DBIter::Prev() {
206   assert(valid_);
207 
208   if (direction_ == kForward) {  // Switch directions?
209     // iter_ is pointing at the current entry.  Scan backwards until
210     // the key changes so we can use the normal reverse scanning code.
211     assert(iter_->Valid());  // Otherwise valid_ would have been false
212     SaveKey(ExtractUserKey(iter_->key()), &saved_key_);
213     while (true) {
214       iter_->Prev();
215       if (!iter_->Valid()) {
216         valid_ = false;
217         saved_key_.clear();
218         ClearSavedValue();
219         return;
220       }
221       if (user_comparator_->Compare(ExtractUserKey(iter_->key()),
222                                     saved_key_) < 0) {
223         break;
224       }
225     }
226     direction_ = kReverse;
227   }
228 
229   FindPrevUserEntry();
230 }
231 
FindPrevUserEntry()232 void DBIter::FindPrevUserEntry() {
233   assert(direction_ == kReverse);
234 
235   ValueType value_type = kTypeDeletion;
236   if (iter_->Valid()) {
237     do {
238       ParsedInternalKey ikey;
239       if (ParseKey(&ikey) && ikey.sequence <= sequence_) {
240         if ((value_type != kTypeDeletion) &&
241             user_comparator_->Compare(ikey.user_key, saved_key_) < 0) {
242           // We encountered a non-deleted value in entries for previous keys,
243           break;
244         }
245         value_type = ikey.type;
246         if (value_type == kTypeDeletion) {
247           saved_key_.clear();
248           ClearSavedValue();
249         } else {
250           Slice raw_value = iter_->value();
251           if (saved_value_.capacity() > raw_value.size() + 1048576) {
252             std::string empty;
253             swap(empty, saved_value_);
254           }
255           SaveKey(ExtractUserKey(iter_->key()), &saved_key_);
256           saved_value_.assign(raw_value.data(), raw_value.size());
257         }
258       }
259       iter_->Prev();
260     } while (iter_->Valid());
261   }
262 
263   if (value_type == kTypeDeletion) {
264     // End
265     valid_ = false;
266     saved_key_.clear();
267     ClearSavedValue();
268     direction_ = kForward;
269   } else {
270     valid_ = true;
271   }
272 }
273 
Seek(const Slice & target)274 void DBIter::Seek(const Slice& target) {
275   direction_ = kForward;
276   ClearSavedValue();
277   saved_key_.clear();
278   AppendInternalKey(
279       &saved_key_, ParsedInternalKey(target, sequence_, kValueTypeForSeek));
280   iter_->Seek(saved_key_);
281   if (iter_->Valid()) {
282     FindNextUserEntry(false, &saved_key_ /* temporary storage */);
283   } else {
284     valid_ = false;
285   }
286 }
287 
SeekToFirst()288 void DBIter::SeekToFirst() {
289   direction_ = kForward;
290   ClearSavedValue();
291   iter_->SeekToFirst();
292   if (iter_->Valid()) {
293     FindNextUserEntry(false, &saved_key_ /* temporary storage */);
294   } else {
295     valid_ = false;
296   }
297 }
298 
SeekToLast()299 void DBIter::SeekToLast() {
300   direction_ = kReverse;
301   ClearSavedValue();
302   iter_->SeekToLast();
303   FindPrevUserEntry();
304 }
305 
306 }  // anonymous namespace
307 
NewDBIterator(DBImpl * db,const Comparator * user_key_comparator,Iterator * internal_iter,SequenceNumber sequence,uint32_t seed)308 Iterator* NewDBIterator(
309     DBImpl* db,
310     const Comparator* user_key_comparator,
311     Iterator* internal_iter,
312     SequenceNumber sequence,
313     uint32_t seed) {
314   return new DBIter(db, user_key_comparator, internal_iter, sequence, seed);
315 }
316 
317 }  // namespace leveldb
318