• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 Huawei Technologies Co., Ltd
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ============================================================================
15"""Validate the input path."""
16import os
17import re
18
19
20def check_valid_character_of_path(file_path):
21    """
22    Validates path.
23
24    The output path of profiler only supports alphabets(a-zA-Z), digit(0-9) or {'-', '_', '.', '/'}.
25
26    Note:
27        Chinese and other paths are not supported at present.
28
29    Args:
30        path (str):  Normalized Path.
31
32    Returns:
33        bool, whether valid.
34    """
35    re_path = r'^[/\\_a-zA-Z0-9-_.@]+$'
36    path_valid = re.fullmatch(re_path, file_path)
37    if not path_valid:
38        msg = "The output path of profiler only supports alphabets(a-zA-Z), " \
39              "digit(0-9) or {'-', '_', '.', '/', '@'}, but got the absolute path= " + file_path
40        raise RuntimeError(msg)
41
42
43def validate_and_normalize_path(
44        path,
45        check_absolute_path=False,
46        allow_parent_dir=True,
47):
48    """
49    Validates path and returns its normalized form.
50
51    If path has a valid scheme, treat path as url, otherwise consider path a
52    unix local path.
53
54    Note:
55        File scheme (rfc8089) is currently not supported.
56
57    Args:
58        path (str): Path to be normalized.
59        check_absolute_path (bool): Whether check path scheme is supported.
60        allow_parent_dir (bool): Whether allow parent dir in path.
61
62    Returns:
63        str, normalized path.
64    """
65    if not path:
66        raise RuntimeError("The path is invalid!")
67
68    path_str = str(path)
69    if not allow_parent_dir:
70        path_components = path_str.split("/")
71        if ".." in path_components:
72            raise RuntimeError("The parent path is not allowed!")
73
74    # path does not have valid schema, treat it as unix local path.
75    if check_absolute_path:
76        if not path_str.startswith("/"):
77            raise RuntimeError("The path is invalid!")
78    try:
79        # most unix systems allow
80        normalized_path = os.path.realpath(path)
81    except ValueError as err:
82        raise RuntimeError("The path is invalid!") from err
83    check_valid_character_of_path(normalized_path)
84    return normalized_path
85