• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: utf-8 -*-
2# Copyright 2014 Google Inc. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#     http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15"""Implementation of hash command for calculating hashes of local files."""
16
17from hashlib import md5
18import os
19
20import crcmod
21
22from gslib.command import Command
23from gslib.command_argument import CommandArgument
24from gslib.cs_api_map import ApiSelector
25from gslib.exception import CommandException
26from gslib.hashing_helper import Base64EncodeHash
27from gslib.hashing_helper import CalculateHashesFromContents
28from gslib.hashing_helper import SLOW_CRCMOD_WARNING
29from gslib.progress_callback import ConstructAnnounceText
30from gslib.progress_callback import FileProgressCallbackHandler
31from gslib.progress_callback import ProgressCallbackWithBackoff
32from gslib.storage_url import StorageUrlFromString
33from gslib.util import NO_MAX
34from gslib.util import UsingCrcmodExtension
35
36_SYNOPSIS = """
37  gsutil [-c] [-h] [-m] hash filename...
38"""
39
40_DETAILED_HELP_TEXT = ("""
41<B>SYNOPSIS</B>
42""" + _SYNOPSIS + """
43
44
45<B>DESCRIPTION</B>
46  The hash command calculates hashes on a local file that can be used to compare
47  with gsutil ls -L output. If a specific hash option is not provided, this
48  command calculates all gsutil-supported hashes for the file.
49
50  Note that gsutil automatically performs hash validation when uploading or
51  downloading files, so this command is only needed if you want to write a
52  script that separately checks the hash for some reason.
53
54  If you calculate a CRC32c hash for the file without a precompiled crcmod
55  installation, hashing will be very slow. See "gsutil help crcmod" for details.
56
57<B>OPTIONS</B>
58  -c          Calculate a CRC32c hash for the file.
59
60  -h          Output hashes in hex format. By default, gsutil uses base64.
61
62  -m          Calculate a MD5 hash for the file.
63""")
64
65
66class HashCommand(Command):
67  """Implementation of gsutil hash command."""
68
69  # Command specification. See base class for documentation.
70  command_spec = Command.CreateCommandSpec(
71      'hash',
72      command_name_aliases=[],
73      usage_synopsis=_SYNOPSIS,
74      min_args=1,
75      max_args=NO_MAX,
76      supported_sub_args='chm',
77      file_url_ok=True,
78      provider_url_ok=False,
79      urls_start_arg=0,
80      gs_api_support=[ApiSelector.JSON],
81      gs_default_api=ApiSelector.JSON,
82      argparse_arguments=[
83          CommandArgument.MakeZeroOrMoreFileURLsArgument()
84      ]
85  )
86  # Help specification. See help_provider.py for documentation.
87  help_spec = Command.HelpSpec(
88      help_name='hash',
89      help_name_aliases=['checksum'],
90      help_type='command_help',
91      help_one_line_summary='Calculate file hashes',
92      help_text=_DETAILED_HELP_TEXT,
93      subcommand_help_text={},
94  )
95
96  @classmethod
97  def _ParseOpts(cls, sub_opts, logger):
98    """Returns behavior variables based on input options.
99
100    Args:
101      sub_opts: getopt sub-arguments for the command.
102      logger: logging.Logger for the command.
103
104    Returns:
105      Tuple of
106      calc_crc32c: Boolean, if True, command should calculate a CRC32c checksum.
107      calc_md5: Boolean, if True, command should calculate an MD5 hash.
108      format_func: Function used for formatting the hash in the desired format.
109      output_format: String describing the hash output format.
110    """
111    calc_crc32c = False
112    calc_md5 = False
113    format_func = lambda digest: Base64EncodeHash(digest.hexdigest())
114    found_hash_option = False
115    output_format = 'base64'
116
117    if sub_opts:
118      for o, unused_a in sub_opts:
119        if o == '-c':
120          calc_crc32c = True
121          found_hash_option = True
122        elif o == '-h':
123          output_format = 'hex'
124          format_func = lambda digest: digest.hexdigest()
125        elif o == '-m':
126          calc_md5 = True
127          found_hash_option = True
128
129    if not found_hash_option:
130      calc_crc32c = True
131      calc_md5 = True
132
133    if calc_crc32c and not UsingCrcmodExtension(crcmod):
134      logger.warn(SLOW_CRCMOD_WARNING)
135
136    return calc_crc32c, calc_md5, format_func, output_format
137
138  def _GetHashClassesFromArgs(self, calc_crc32c, calc_md5):
139    """Constructs the dictionary of hashes to compute based on the arguments.
140
141    Args:
142      calc_crc32c: If True, CRC32c should be included.
143      calc_md5: If True, MD5 should be included.
144
145    Returns:
146      Dictionary of {string: hash digester}, where string the name of the
147          digester algorithm.
148    """
149    hash_dict = {}
150    if calc_crc32c:
151      hash_dict['crc32c'] = crcmod.predefined.Crc('crc-32c')
152    if calc_md5:
153      hash_dict['md5'] = md5()
154    return hash_dict
155
156  def RunCommand(self):
157    """Command entry point for the hash command."""
158    (calc_crc32c, calc_md5, format_func, output_format) = (
159        self._ParseOpts(self.sub_opts, self.logger))
160
161    matched_one = False
162    for url_str in self.args:
163      if not StorageUrlFromString(url_str).IsFileUrl():
164        raise CommandException('"hash" command requires a file URL')
165
166      for file_ref in self.WildcardIterator(url_str).IterObjects():
167        matched_one = True
168        file_name = file_ref.storage_url.object_name
169        file_size = os.path.getsize(file_name)
170        callback_processor = ProgressCallbackWithBackoff(
171            file_size, FileProgressCallbackHandler(
172                ConstructAnnounceText('Hashing', file_name), self.logger).call)
173        hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5)
174        with open(file_name, 'rb') as fp:
175          CalculateHashesFromContents(fp, hash_dict,
176                                      callback_processor=callback_processor)
177        print 'Hashes [%s] for %s:' % (output_format, file_name)
178        for name, digest in hash_dict.iteritems():
179          print '\tHash (%s):\t\t%s' % (name, format_func(digest))
180
181    if not matched_one:
182      raise CommandException('No files matched')
183
184    return 0
185
186