• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2013 The ChromiumOS Authors
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4"""The hill genetic algorithm.
5
6Part of the Chrome build flags optimization.
7"""
8
9__author__ = "yuhenglong@google.com (Yuheng Long)"
10
11import random
12
13import flags
14from flags import Flag
15from flags import FlagSet
16from generation import Generation
17from task import Task
18
19
20def CrossoverWith(first_flag, second_flag):
21    """Get a crossed over gene.
22
23    At present, this just picks either/or of these values.  However, it could be
24    implemented as an integer maskover effort, if required.
25
26    Args:
27      first_flag: The first gene (Flag) to cross over with.
28      second_flag: The second gene (Flag) to cross over with.
29
30    Returns:
31      A Flag that can be considered appropriately randomly blended between the
32      first and second input flag.
33    """
34
35    return first_flag if random.randint(0, 1) else second_flag
36
37
38def RandomMutate(specs, flag_set, mutation_rate):
39    """Randomly mutate the content of a task.
40
41    Args:
42      specs: A list of spec from which the flag set is created.
43      flag_set: The current flag set being mutated
44      mutation_rate: What fraction of genes to mutate.
45
46    Returns:
47      A Genetic Task constructed by randomly mutating the input flag set.
48    """
49
50    results_flags = []
51
52    for spec in specs:
53        # Randomly choose whether this flag should be mutated.
54        if random.randint(0, int(1 / mutation_rate)):
55            continue
56
57        # If the flag is not already in the flag set, it is added.
58        if spec not in flag_set:
59            results_flags.append(Flag(spec))
60            continue
61
62        # If the flag is already in the flag set, it is mutated.
63        numeric_flag_match = flags.Search(spec)
64
65        # The value of a numeric flag will be changed, and a boolean flag will be
66        # dropped.
67        if not numeric_flag_match:
68            continue
69
70        value = flag_set[spec].GetValue()
71
72        # Randomly select a nearby value of the current value of the flag.
73        rand_arr = [value]
74        if value + 1 < int(numeric_flag_match.group("end")):
75            rand_arr.append(value + 1)
76
77        rand_arr.append(value - 1)
78        value = random.sample(rand_arr, 1)[0]
79
80        # If the value is smaller than the start of the spec, this flag will be
81        # dropped.
82        if value != int(numeric_flag_match.group("start")) - 1:
83            results_flags.append(Flag(spec, value))
84
85    return GATask(FlagSet(results_flags))
86
87
88class GATask(Task):
89    def __init__(self, flag_set):
90        Task.__init__(self, flag_set)
91
92    def ReproduceWith(self, other, specs, mutation_rate):
93        """Reproduce with other FlagSet.
94
95        Args:
96          other: A FlagSet to reproduce with.
97          specs: A list of spec from which the flag set is created.
98          mutation_rate: one in mutation_rate flags will be mutated (replaced by a
99            random version of the same flag, instead of one from either of the
100            parents).  Set to 0 to disable mutation.
101
102        Returns:
103          A GA task made by mixing self with other.
104        """
105
106        # Get the flag dictionary.
107        father_flags = self.GetFlags().GetFlags()
108        mother_flags = other.GetFlags().GetFlags()
109
110        # Flags that are common in both parents and flags that belong to only one
111        # parent.
112        self_flags = []
113        other_flags = []
114        common_flags = []
115
116        # Find out flags that are common to both parent and flags that belong soly
117        # to one parent.
118        for self_flag in father_flags:
119            if self_flag in mother_flags:
120                common_flags.append(self_flag)
121            else:
122                self_flags.append(self_flag)
123
124        for other_flag in mother_flags:
125            if other_flag not in father_flags:
126                other_flags.append(other_flag)
127
128        # Randomly select flags that belong to only one parent.
129        output_flags = [
130            father_flags[f] for f in self_flags if random.randint(0, 1)
131        ]
132        others = [mother_flags[f] for f in other_flags if random.randint(0, 1)]
133        output_flags.extend(others)
134        # Turn on flags that belong to both parent. Randomly choose the value of the
135        # flag from either parent.
136        for flag in common_flags:
137            output_flags.append(
138                CrossoverWith(father_flags[flag], mother_flags[flag])
139            )
140
141        # Mutate flags
142        if mutation_rate:
143            return RandomMutate(specs, FlagSet(output_flags), mutation_rate)
144
145        return GATask(FlagSet(output_flags))
146
147
148class GAGeneration(Generation):
149    """The Genetic Algorithm."""
150
151    # The value checks whether the algorithm has converged and arrives at a fixed
152    # point. If STOP_THRESHOLD of generations have not seen any performance
153    # improvement, the Genetic Algorithm stops.
154    STOP_THRESHOLD = None
155
156    # Number of tasks in each generation.
157    NUM_CHROMOSOMES = None
158
159    # The value checks whether the algorithm has converged and arrives at a fixed
160    # point. If NUM_TRIALS of trials have been attempted to generate a new task
161    # without a success, the Genetic Algorithm stops.
162    NUM_TRIALS = None
163
164    # The flags that can be used to generate new tasks.
165    SPECS = None
166
167    # What fraction of genes to mutate.
168    MUTATION_RATE = 0
169
170    @staticmethod
171    def InitMetaData(
172        stop_threshold, num_chromosomes, num_trials, specs, mutation_rate
173    ):
174        """Set up the meta data for the Genetic Algorithm.
175
176        Args:
177          stop_threshold: The number of generations, upon which no performance has
178            seen, the Genetic Algorithm stops.
179          num_chromosomes: Number of tasks in each generation.
180          num_trials: The number of trials, upon which new task has been tried to
181            generated without success, the Genetic Algorithm stops.
182          specs: The flags that can be used to generate new tasks.
183          mutation_rate: What fraction of genes to mutate.
184        """
185
186        GAGeneration.STOP_THRESHOLD = stop_threshold
187        GAGeneration.NUM_CHROMOSOMES = num_chromosomes
188        GAGeneration.NUM_TRIALS = num_trials
189        GAGeneration.SPECS = specs
190        GAGeneration.MUTATION_RATE = mutation_rate
191
192    def __init__(self, tasks, parents, total_stucks):
193        """Set up the meta data for the Genetic Algorithm.
194
195        Args:
196          tasks: A set of tasks to be run.
197          parents: A set of tasks from which this new generation is produced. This
198            set also contains the best tasks generated so far.
199          total_stucks: The number of generations that have not seen improvement.
200            The Genetic Algorithm will stop once the total_stucks equals to
201            NUM_TRIALS defined in the GAGeneration class.
202        """
203
204        Generation.__init__(self, tasks, parents)
205        self._total_stucks = total_stucks
206
207    def IsImproved(self):
208        """True if this generation has improvement upon its parent generation."""
209
210        tasks = self.Pool()
211        parents = self.CandidatePool()
212
213        # The first generate does not have parents.
214        if not parents:
215            return True
216
217        # Found out whether a task has improvement upon the best task in the
218        # parent generation.
219        best_parent = sorted(parents, key=lambda task: task.GetTestResult())[0]
220        best_current = sorted(tasks, key=lambda task: task.GetTestResult())[0]
221
222        # At least one task has improvement.
223        if best_current.IsImproved(best_parent):
224            self._total_stucks = 0
225            return True
226
227        # If STOP_THRESHOLD of generations have no improvement, the algorithm stops.
228        if self._total_stucks >= GAGeneration.STOP_THRESHOLD:
229            return False
230
231        self._total_stucks += 1
232        return True
233
234    def Next(self, cache):
235        """Calculate the next generation.
236
237        Generate a new generation from the a set of tasks. This set contains the
238          best set seen so far and the tasks executed in the parent generation.
239
240        Args:
241          cache: A set of tasks that have been generated before.
242
243        Returns:
244          A set of new generations.
245        """
246
247        target_len = GAGeneration.NUM_CHROMOSOMES
248        specs = GAGeneration.SPECS
249        mutation_rate = GAGeneration.MUTATION_RATE
250
251        # Collect a set of size target_len of tasks. This set will be used to
252        # produce a new generation of tasks.
253        gen_tasks = [task for task in self.Pool()]
254
255        parents = self.CandidatePool()
256        if parents:
257            gen_tasks.extend(parents)
258
259        # A set of tasks that are the best. This set will be used as the parent
260        # generation to produce the next generation.
261        sort_func = lambda task: task.GetTestResult()
262        retained_tasks = sorted(gen_tasks, key=sort_func)[:target_len]
263
264        child_pool = set()
265        for father in retained_tasks:
266            num_trials = 0
267            # Try num_trials times to produce a new child.
268            while num_trials < GAGeneration.NUM_TRIALS:
269                # Randomly select another parent.
270                mother = random.choice(retained_tasks)
271                # Cross over.
272                child = mother.ReproduceWith(father, specs, mutation_rate)
273                if child not in child_pool and child not in cache:
274                    child_pool.add(child)
275                    break
276                else:
277                    num_trials += 1
278
279        num_trials = 0
280
281        while (
282            len(child_pool) < target_len
283            and num_trials < GAGeneration.NUM_TRIALS
284        ):
285            for keep_task in retained_tasks:
286                # Mutation.
287                child = RandomMutate(specs, keep_task.GetFlags(), mutation_rate)
288                if child not in child_pool and child not in cache:
289                    child_pool.add(child)
290                    if len(child_pool) >= target_len:
291                        break
292                else:
293                    num_trials += 1
294
295        # If NUM_TRIALS of tries have been attempted without generating a set of new
296        # tasks, the algorithm stops.
297        if num_trials >= GAGeneration.NUM_TRIALS:
298            return []
299
300        assert len(child_pool) == target_len
301
302        return [
303            GAGeneration(child_pool, set(retained_tasks), self._total_stucks)
304        ]
305