• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3
2# Copyright 2021 The ChromiumOS Authors
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5"""General-purpose utility functions that don't belong elsewhere."""
6
7
8def levenshtein_distance(aa: str, bb: str) -> int:
9  """Compute the Levenshtein distance between two strings.
10
11    This is the minimum number of single-character edits required to change aa
12    into bb and serves usefully as a distance metric between strings.
13
14    Args:
15      aa: first string
16      bb: second string
17
18    Returns:
19      Edit distance between aa and bb.
20    """
21
22  # pylint: disable=invalid-name
23
24  # We're effectively building a matrix of distances between two prefixes
25  # of the strings:
26  #
27  #     0 1 2 3
28  #    │ │c│a│t│
29  #   ─┼─┼─┼─┼─┼
30  # 0 s│1│1│2│3│
31  #   ─┼─┼─┼─┼─┼
32  # 1 a│2│2│1│2│
33  #   ─┼─┼─┼─┼─┼
34  # 2 d│3│3│2│2│
35  #
36  # But, if we don't want the _actual_ edit sequence, we can just keep the
37  # first two rows.
38
39  # if bb is empty, edit distance is just deleting aa.
40  if not bb:
41    return len(aa)
42
43  # similarly, if aa is empty, edit distance is just inserting bb.
44  if not aa:
45    return len(bb)
46
47  prev_dist = list(range(len(bb) + 1))
48  curr_dist = [0] * (len(bb) + 1)
49
50  for ii, chara in enumerate(aa):
51    # we can always remove ii+1 characters to match the empty string.
52    curr_dist[0] = ii + 1
53
54    # compute cost of each of three operations from previous
55    # distances and choose the cheapest one.
56    for jj, charb in enumerate(bb):
57      cost_delete = prev_dist[jj + 1] + 1
58      cost_insert = curr_dist[jj] + 1
59      cost_change = prev_dist[jj]
60      if chara != charb:
61        cost_change += 1
62
63      curr_dist[jj + 1] = min(cost_delete, cost_insert, cost_change)
64
65    # swap
66    curr_dist, prev_dist = prev_dist, curr_dist
67
68  return prev_dist[-1]
69