1# Copyright 2019 Huawei Technologies Co., Ltd 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================ 15"""Validate the input path.""" 16import os 17import re 18 19 20def check_valid_character_of_path(file_path): 21 """ 22 Validates path. 23 24 The output path of profiler only supports alphabets(a-zA-Z), digit(0-9) or {'-', '_', '.', '/'}. 25 26 Note: 27 Chinese and other paths are not supported at present. 28 29 Args: 30 path (str): Normalized Path. 31 32 Returns: 33 bool, whether valid. 34 """ 35 re_path = r'^[/\\_a-zA-Z0-9-_.@]+$' 36 path_valid = re.fullmatch(re_path, file_path) 37 if not path_valid: 38 msg = "The output path of profiler only supports alphabets(a-zA-Z), " \ 39 "digit(0-9) or {'-', '_', '.', '/', '@'}, but got the absolute path= " + file_path 40 raise RuntimeError(msg) 41 42 43def validate_and_normalize_path( 44 path, 45 check_absolute_path=False, 46 allow_parent_dir=True, 47): 48 """ 49 Validates path and returns its normalized form. 50 51 If path has a valid scheme, treat path as url, otherwise consider path a 52 unix local path. 53 54 Note: 55 File scheme (rfc8089) is currently not supported. 56 57 Args: 58 path (str): Path to be normalized. 59 check_absolute_path (bool): Whether check path scheme is supported. 60 allow_parent_dir (bool): Whether allow parent dir in path. 61 62 Returns: 63 str, normalized path. 64 """ 65 if not path: 66 raise RuntimeError("The path is invalid!") 67 68 path_str = str(path) 69 if not allow_parent_dir: 70 path_components = path_str.split("/") 71 if ".." in path_components: 72 raise RuntimeError("The parent path is not allowed!") 73 74 # path does not have valid schema, treat it as unix local path. 75 if check_absolute_path: 76 if not path_str.startswith("/"): 77 raise RuntimeError("The path is invalid!") 78 try: 79 # most unix systems allow 80 normalized_path = os.path.realpath(path) 81 except ValueError as err: 82 raise RuntimeError("The path is invalid!") from err 83 check_valid_character_of_path(normalized_path) 84 return normalized_path 85