1# -*- coding: utf-8 -*- 2# Copyright 2014 Google Inc. All Rights Reserved. 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15"""Implementation of hash command for calculating hashes of local files.""" 16 17from hashlib import md5 18import os 19 20import crcmod 21 22from gslib.command import Command 23from gslib.command_argument import CommandArgument 24from gslib.cs_api_map import ApiSelector 25from gslib.exception import CommandException 26from gslib.hashing_helper import Base64EncodeHash 27from gslib.hashing_helper import CalculateHashesFromContents 28from gslib.hashing_helper import SLOW_CRCMOD_WARNING 29from gslib.progress_callback import ConstructAnnounceText 30from gslib.progress_callback import FileProgressCallbackHandler 31from gslib.progress_callback import ProgressCallbackWithBackoff 32from gslib.storage_url import StorageUrlFromString 33from gslib.util import NO_MAX 34from gslib.util import UsingCrcmodExtension 35 36_SYNOPSIS = """ 37 gsutil [-c] [-h] [-m] hash filename... 38""" 39 40_DETAILED_HELP_TEXT = (""" 41<B>SYNOPSIS</B> 42""" + _SYNOPSIS + """ 43 44 45<B>DESCRIPTION</B> 46 The hash command calculates hashes on a local file that can be used to compare 47 with gsutil ls -L output. If a specific hash option is not provided, this 48 command calculates all gsutil-supported hashes for the file. 49 50 Note that gsutil automatically performs hash validation when uploading or 51 downloading files, so this command is only needed if you want to write a 52 script that separately checks the hash for some reason. 53 54 If you calculate a CRC32c hash for the file without a precompiled crcmod 55 installation, hashing will be very slow. See "gsutil help crcmod" for details. 56 57<B>OPTIONS</B> 58 -c Calculate a CRC32c hash for the file. 59 60 -h Output hashes in hex format. By default, gsutil uses base64. 61 62 -m Calculate a MD5 hash for the file. 63""") 64 65 66class HashCommand(Command): 67 """Implementation of gsutil hash command.""" 68 69 # Command specification. See base class for documentation. 70 command_spec = Command.CreateCommandSpec( 71 'hash', 72 command_name_aliases=[], 73 usage_synopsis=_SYNOPSIS, 74 min_args=1, 75 max_args=NO_MAX, 76 supported_sub_args='chm', 77 file_url_ok=True, 78 provider_url_ok=False, 79 urls_start_arg=0, 80 gs_api_support=[ApiSelector.JSON], 81 gs_default_api=ApiSelector.JSON, 82 argparse_arguments=[ 83 CommandArgument.MakeZeroOrMoreFileURLsArgument() 84 ] 85 ) 86 # Help specification. See help_provider.py for documentation. 87 help_spec = Command.HelpSpec( 88 help_name='hash', 89 help_name_aliases=['checksum'], 90 help_type='command_help', 91 help_one_line_summary='Calculate file hashes', 92 help_text=_DETAILED_HELP_TEXT, 93 subcommand_help_text={}, 94 ) 95 96 @classmethod 97 def _ParseOpts(cls, sub_opts, logger): 98 """Returns behavior variables based on input options. 99 100 Args: 101 sub_opts: getopt sub-arguments for the command. 102 logger: logging.Logger for the command. 103 104 Returns: 105 Tuple of 106 calc_crc32c: Boolean, if True, command should calculate a CRC32c checksum. 107 calc_md5: Boolean, if True, command should calculate an MD5 hash. 108 format_func: Function used for formatting the hash in the desired format. 109 output_format: String describing the hash output format. 110 """ 111 calc_crc32c = False 112 calc_md5 = False 113 format_func = lambda digest: Base64EncodeHash(digest.hexdigest()) 114 found_hash_option = False 115 output_format = 'base64' 116 117 if sub_opts: 118 for o, unused_a in sub_opts: 119 if o == '-c': 120 calc_crc32c = True 121 found_hash_option = True 122 elif o == '-h': 123 output_format = 'hex' 124 format_func = lambda digest: digest.hexdigest() 125 elif o == '-m': 126 calc_md5 = True 127 found_hash_option = True 128 129 if not found_hash_option: 130 calc_crc32c = True 131 calc_md5 = True 132 133 if calc_crc32c and not UsingCrcmodExtension(crcmod): 134 logger.warn(SLOW_CRCMOD_WARNING) 135 136 return calc_crc32c, calc_md5, format_func, output_format 137 138 def _GetHashClassesFromArgs(self, calc_crc32c, calc_md5): 139 """Constructs the dictionary of hashes to compute based on the arguments. 140 141 Args: 142 calc_crc32c: If True, CRC32c should be included. 143 calc_md5: If True, MD5 should be included. 144 145 Returns: 146 Dictionary of {string: hash digester}, where string the name of the 147 digester algorithm. 148 """ 149 hash_dict = {} 150 if calc_crc32c: 151 hash_dict['crc32c'] = crcmod.predefined.Crc('crc-32c') 152 if calc_md5: 153 hash_dict['md5'] = md5() 154 return hash_dict 155 156 def RunCommand(self): 157 """Command entry point for the hash command.""" 158 (calc_crc32c, calc_md5, format_func, output_format) = ( 159 self._ParseOpts(self.sub_opts, self.logger)) 160 161 matched_one = False 162 for url_str in self.args: 163 if not StorageUrlFromString(url_str).IsFileUrl(): 164 raise CommandException('"hash" command requires a file URL') 165 166 for file_ref in self.WildcardIterator(url_str).IterObjects(): 167 matched_one = True 168 file_name = file_ref.storage_url.object_name 169 file_size = os.path.getsize(file_name) 170 callback_processor = ProgressCallbackWithBackoff( 171 file_size, FileProgressCallbackHandler( 172 ConstructAnnounceText('Hashing', file_name), self.logger).call) 173 hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5) 174 with open(file_name, 'rb') as fp: 175 CalculateHashesFromContents(fp, hash_dict, 176 callback_processor=callback_processor) 177 print 'Hashes [%s] for %s:' % (output_format, file_name) 178 for name, digest in hash_dict.iteritems(): 179 print '\tHash (%s):\t\t%s' % (name, format_func(digest)) 180 181 if not matched_one: 182 raise CommandException('No files matched') 183 184 return 0 185 186