1 // This example shows how to sort structs using complex multiple part keys using
2 // string_sort.
3 //
4 // Copyright Steven Ross 2009-2014.
5 //
6 // Distributed under the Boost Software License, Version 1.0.
7 // (See accompanying file LICENSE_1_0.txt or copy at
8 // http://www.boost.org/LICENSE_1_0.txt)
9
10 // See http://www.boost.org/libs/sort for library home page.
11
12 #include <boost/sort/spreadsort/string_sort.hpp>
13 #include <boost/sort/spreadsort/float_sort.hpp>
14 #include <time.h>
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <algorithm>
18 #include <vector>
19 #include <iostream>
20 #include <fstream>
21 #include <string>
22 using std::string;
23 using namespace boost::sort::spreadsort;
24
25 //[generalized_functors
26 struct DATA_TYPE {
27 time_t birth;
28 float net_worth;
29 string first_name;
30 string last_name;
31 };
32
33 static const int birth_size = sizeof(time_t);
34 static const int first_name_offset = birth_size + sizeof(float);
35 static const boost::uint64_t base_mask = 0xff;
36
37 struct lessthan {
operator ()lessthan38 inline bool operator()(const DATA_TYPE &x, const DATA_TYPE &y) const {
39 if (x.birth != y.birth) {
40 return x.birth < y.birth;
41 }
42 if (x.net_worth != y.net_worth) {
43 return x.net_worth < y.net_worth;
44 }
45 if (x.first_name != y.first_name) {
46 return x.first_name < y.first_name;
47 }
48 return x.last_name < y.last_name;
49 }
50 };
51
52 struct bracket {
operator ()bracket53 inline unsigned char operator()(const DATA_TYPE &x, size_t offset) const {
54 // Sort date as a signed int, returning the appropriate byte.
55 if (offset < birth_size) {
56 const int bit_shift = 8 * (birth_size - offset - 1);
57 unsigned char result = (x.birth & (base_mask << bit_shift)) >> bit_shift;
58 // Handling the sign bit. Unnecessary if the data is always positive.
59 if (offset == 0) {
60 return result ^ 128;
61 }
62
63 return result;
64 }
65
66 // Sort a signed float. This requires reversing the order of negatives
67 // because of the way floats are represented in bits.
68 if (offset < first_name_offset) {
69 const int bit_shift = 8 * (first_name_offset - offset - 1);
70 unsigned key = float_mem_cast<float, unsigned>(x.net_worth);
71 unsigned char result = (key & (base_mask << bit_shift)) >> bit_shift;
72 // Handling the sign.
73 if (x.net_worth < 0) {
74 return 255 - result;
75 }
76 // Increasing positives so they are higher than negatives.
77 if (offset == birth_size) {
78 return 128 + result;
79 }
80
81 return result;
82 }
83
84 // Sort a string that is before the end. This approach supports embedded
85 // nulls. If embedded nulls are not required, then just delete the "* 2"
86 // and the inside of the following if just becomes:
87 // return x.first_name[offset - first_name_offset];
88 const unsigned first_name_end_offset =
89 first_name_offset + x.first_name.size() * 2;
90 if (offset < first_name_end_offset) {
91 int char_offset = offset - first_name_offset;
92 // This signals that the string continues.
93 if (!(char_offset & 1)) {
94 return 1;
95 }
96 return x.first_name[char_offset >> 1];
97 }
98
99 // This signals that the string has ended, so that shorter strings come
100 // before longer ones.
101 if (offset == first_name_end_offset) {
102 return 0;
103 }
104
105 // The final string needs no special consideration.
106 return x.last_name[offset - first_name_end_offset - 1];
107 }
108 };
109
110 struct getsize {
operator ()getsize111 inline size_t operator()(const DATA_TYPE &x) const {
112 return first_name_offset + x.first_name.size() * 2 + 1 +
113 x.last_name.size();
114 }
115 };
116 //] [/generalized_functors]
117
118 //Pass in an argument to test std::sort
main(int argc,const char ** argv)119 int main(int argc, const char ** argv) {
120 std::ifstream indata;
121 std::ofstream outfile;
122 bool stdSort = false;
123 unsigned loopCount = 1;
124 for (int u = 1; u < argc; ++u) {
125 if (std::string(argv[u]) == "-std")
126 stdSort = true;
127 else
128 loopCount = atoi(argv[u]);
129 }
130 double total = 0.0;
131 //Run multiple loops, if requested
132 std::vector<DATA_TYPE> array;
133 for (unsigned u = 0; u < loopCount; ++u) {
134 indata.open("input.txt", std::ios_base::in | std::ios_base::binary);
135 if (indata.bad()) {
136 printf("input.txt could not be opened\n");
137 return 1;
138 }
139
140 // Read in the data.
141 DATA_TYPE inval;
142 while (!indata.eof() ) {
143 indata >> inval.first_name;
144 indata >> inval.last_name;
145 indata.read(reinterpret_cast<char *>(&(inval.birth)), birth_size);
146 indata.read(reinterpret_cast<char *>(&(inval.net_worth)), sizeof(float));
147 // Handling nan.
148 if (inval.net_worth != inval.net_worth) {
149 inval.net_worth = 0;
150 }
151 if (indata.eof())
152 break;
153 array.push_back(inval);
154 }
155 indata.close();
156
157 // Sort the data.
158 clock_t start, end;
159 double elapsed;
160 start = clock();
161 if (stdSort) {
162 std::sort(array.begin(), array.end(), lessthan());
163 } else {
164 //[generalized_functors_call
165 string_sort(array.begin(), array.end(), bracket(), getsize(), lessthan());
166 //] [/generalized_functors_call]
167 }
168 end = clock();
169 elapsed = static_cast<double>(end - start);
170 if (stdSort) {
171 outfile.open("standard_sort_out.txt", std::ios_base::out |
172 std::ios_base::binary | std::ios_base::trunc);
173 } else {
174 outfile.open("boost_sort_out.txt", std::ios_base::out |
175 std::ios_base::binary | std::ios_base::trunc);
176 }
177 if (outfile.good()) {
178 for (unsigned u = 0; u < array.size(); ++u)
179 outfile << array[u].birth << " " << array[u].net_worth << " "
180 << array[u].first_name << " " << array[u].last_name << "\n";
181 outfile.close();
182 }
183 total += elapsed;
184 array.clear();
185 }
186 if (stdSort) {
187 printf("std::sort elapsed time %f\n", total / CLOCKS_PER_SEC);
188 } else {
189 printf("spreadsort elapsed time %f\n", total / CLOCKS_PER_SEC);
190 }
191 return 0;
192 }
193