1 2 /** 3 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 * SPDX-License-Identifier: Apache-2.0. 5 */ 6 7 package software.amazon.awssdk.crt.utils; 8 9 import java.util.regex.Matcher; 10 import java.util.regex.Pattern; 11 12 /** 13 * Utility Class used for Cleaning Up and Sanity Checking PEM formatted Strings 14 * for Validity. 15 */ 16 public class PemUtils { 17 private static final int ALLOWED_CHARS_PER_LINE = 64; 18 private static final String BASE_64_ENCODING_CHARS = "a-zA-Z0-9=+\\/"; 19 private static final String ALLOWED_WHITESPACE_CHARS = " \\r\\n"; 20 private static final String DELIMITER_CHARS = "-"; 21 private static final String MINUMUM_ALLOWED_PEM_CHARACTER_SET = BASE_64_ENCODING_CHARS + ALLOWED_WHITESPACE_CHARS 22 + DELIMITER_CHARS; 23 private static final String NON_BASE64_CHARACTER_SET = "[^" + BASE_64_ENCODING_CHARS + "]+"; 24 private static final String UNNECESSARY_PEM_CHARACTER_SET = "[^" + MINUMUM_ALLOWED_PEM_CHARACTER_SET + "]+"; 25 26 // These Regex's roughly follows the formal RFC Grammar Definition here: 27 // https://tools.ietf.org/html/rfc7468#section-3 28 private static final String PEM_OBJECT_TYPE = "([A-Z0-9 ]+)"; 29 private static final String PEM_DASHES = "[-]+"; 30 private static final String PEM_NON_DASHES = "([^-]+)"; 31 private static final String PEM_BEGIN_LINE_REGEX = PEM_DASHES + "BEGIN " + PEM_OBJECT_TYPE + PEM_DASHES; 32 33 // Be fairly lenient on PEM Contents regex since formatAccordingToRFC() will 34 // filter out non-Base64 Characters 35 private static final String PEM_CONTENTS_REGEX = PEM_NON_DASHES; 36 37 private static final String PEM_END_LINE_REGEX = PEM_DASHES + "END " + PEM_OBJECT_TYPE + PEM_DASHES; 38 39 private static final Pattern INVALID_PEM_CHARACTER_PATTERN = Pattern.compile(UNNECESSARY_PEM_CHARACTER_SET); 40 private static final Pattern PEM_BEGIN_PATTERN = Pattern.compile(PEM_BEGIN_LINE_REGEX); 41 private static final Pattern PEM_END_PATTERN = Pattern.compile(PEM_END_LINE_REGEX); 42 43 private static final Pattern PEM_OBJECT_PATTERN = Pattern 44 .compile(PEM_BEGIN_LINE_REGEX + PEM_CONTENTS_REGEX + PEM_END_LINE_REGEX); 45 46 // Pattern for catching when END and BEGIN being on the same line and they need 47 // to be separated. 48 private static final Pattern COMBINED_PEM_HEADERS = Pattern.compile(PEM_END_LINE_REGEX + "BEGIN"); 49 PemUtils()50 private PemUtils() { 51 } 52 53 /** 54 * Removes characters that are not valid in PEM format (non-base64 chars). No other cleanup is 55 * done. 56 * 57 * @param pem The input "dirty" PEM 58 * @return The output "clean" PEM 59 */ removeInvalidPemChars(String pem)60 public static String removeInvalidPemChars(String pem) { 61 if (pem == null || pem.length() == 0) { 62 return pem; 63 } 64 return pem.replaceAll(UNNECESSARY_PEM_CHARACTER_SET, ""); 65 } 66 67 /** 68 * Removes characters that are not valid in base64. No other cleanup is 69 * done. 70 * 71 * @param base64Contents The input "dirty" PEM 72 * @return The output "clean" PEM 73 */ removeNonBase64Chars(String base64Contents)74 private static String removeNonBase64Chars(String base64Contents) { 75 if (base64Contents == null || base64Contents.length() == 0) { 76 return base64Contents; 77 } 78 return base64Contents.replaceAll(NON_BASE64_CHARACTER_SET, ""); 79 } 80 81 /** 82 * Parses the full PEM Chain one object at a time and rewrites it following the 83 * RFC formatting rules. 84 * 85 * Performs the following operations and fixes: - Base 64 Encoded PEM Content 86 * Formatting: - All lines exactly 64 Characters long except for the last line. 87 * - Only whitespace is a single newline every 64 chars - The number of dashes 88 * "-" on the BEGIN and END lines are exactly 5 dashes - Garbage characters 89 * in-between PEM objects (characters after an END and before the next BEGIN) 90 * are removed 91 * 92 * For more info, see: https://tools.ietf.org/html/rfc1421#section-4.3.2.4 93 * 94 * @param pem The input "dirty" PEM 95 * @return The output "clean" PEM 96 */ formatAccordingToRFC(String pem)97 private static String formatAccordingToRFC(String pem) { 98 if (pem == null || pem.length() == 0) { 99 return pem; 100 } 101 Matcher matcher = PEM_OBJECT_PATTERN.matcher(pem); 102 StringBuffer outBuffer = new StringBuffer(); 103 104 int count = 0; 105 while (matcher.find()) { 106 if (count > 0) { 107 outBuffer.append('\n'); 108 } 109 String beginType = matcher.group(1); 110 String base64Contents = removeNonBase64Chars(matcher.group(2)); 111 String endType = matcher.group(3); 112 113 outBuffer.append("-----BEGIN " + beginType + "-----\n"); 114 115 int index = 0; 116 for (char c : base64Contents.toCharArray()) { 117 118 outBuffer.append(c); 119 index++; 120 if (index % ALLOWED_CHARS_PER_LINE == 0) { 121 outBuffer.append('\n'); 122 } 123 } 124 if (outBuffer.charAt(outBuffer.length() - 1) != '\n') { 125 outBuffer.append('\n'); 126 } 127 128 outBuffer.append("-----END " + endType + "-----"); 129 count++; 130 } 131 132 return outBuffer.toString(); 133 } 134 135 /** 136 * Inserts newlines in combined PEM Headers (Eg "-----END 137 * CERTIFICATE----------BEGIN CERTIFICATE-----") 138 * 139 * @param pem The input "dirty" PEM 140 * @return The output "clean" PEM 141 */ splitCombinedPemHeaders(String pem)142 private static String splitCombinedPemHeaders(String pem) { 143 if (pem == null || pem.length() == 0) { 144 return pem; 145 } 146 Matcher m = COMBINED_PEM_HEADERS.matcher(pem); 147 if (m.find()) { 148 // The Parenthesis in "([A-Z0-9 ]+)" in PEM_OBJECT_TYPE is a capturing group, it 149 // allows us to reference 150 // Group 1 with "$1" in the replaceAll() call here. 151 // For more info see: https://stackoverflow.com/a/27328750/7565918 152 return m.replaceAll("-----END $1-----\n-----BEGIN"); 153 } 154 return pem; 155 } 156 157 /** 158 * Merge consecutive spaces into a single space (Eg "BEGIN CERTIFICATE", will 159 * become "BEGIN CERTIFICATE") 160 * 161 * @param pem The input "dirty" PEM 162 * @return The output "clean" PEM 163 */ mergeSpaces(String pem)164 private static String mergeSpaces(String pem) { 165 if (pem == null || pem.length() == 0) { 166 return pem; 167 } 168 return pem.replaceAll("[ ]+", " "); 169 } 170 171 /** 172 * Remove any spaces next to dashes (Eg "----- BEGIN" will become "-----BEGIN") 173 * 174 * @param pem The input "dirty" PEM 175 * @return The output "clean" PEM 176 */ removeSpacesNextToDashes(String pem)177 private static String removeSpacesNextToDashes(String pem) { 178 if (pem == null || pem.length() == 0) { 179 return pem; 180 } 181 return pem.replaceAll("( -)|(- )", "-"); 182 } 183 184 /** 185 * Cleanup Function that removes most formatting and copy/paste mistakes from 186 * PEM formatted Strings. 187 * 188 * @param pem The input "dirty" PEM 189 * @return The output "clean" PEM 190 */ cleanUpPem(String pem)191 public static String cleanUpPem(String pem) { 192 if (pem == null || pem.length() == 0) { 193 return pem; 194 } 195 196 String cleanPem = removeInvalidPemChars(pem); 197 198 cleanPem = mergeSpaces(cleanPem); 199 200 cleanPem = removeSpacesNextToDashes(cleanPem); 201 202 cleanPem = splitCombinedPemHeaders(cleanPem); 203 204 cleanPem = formatAccordingToRFC(cleanPem); 205 206 return cleanPem; 207 } 208 209 /** 210 * Checks for invalid characters in the PEM (Eg non-ASCII Chars). 211 * 212 * @param pem The input PEM formatted String 213 * @throws IllegalArgumentException If any character in the PEM is outside the 214 * valid Character Set. 215 */ validateCharacterSet(String pem)216 private static void validateCharacterSet(String pem) { 217 // If there are any invalid characters, throw an exception with detailed info 218 // about their indexes 219 if (INVALID_PEM_CHARACTER_PATTERN.matcher(pem).matches()) { 220 StringBuilder debugStr = new StringBuilder(); 221 int index = 0; 222 for (char c : pem.toCharArray()) { 223 if (INVALID_PEM_CHARACTER_PATTERN.matcher(String.valueOf(c)).matches()) { 224 if (debugStr.length() > 0) { 225 debugStr.append(", "); 226 } 227 debugStr.append("\\u" + (int) c + " at index " + index); 228 } 229 index++; 230 } 231 throw new IllegalArgumentException("Illegal Characters found in PEM file. Chars: " + debugStr); 232 } 233 } 234 235 /** 236 * Checks that the number of "BEGIN" statements matches the number of "END" 237 * statements, and that END's come after BEGIN's. 238 * 239 * @param pem The input PEM formatted String 240 * @param maxChainDepth The max number of PEM Formatted Objects in the String. 241 * @return The number of PEM encoded objects found by Regex 242 */ validatePemByRegexParser(String pem, String expectedPemTypeSubString, int maxChainDepth)243 private static int validatePemByRegexParser(String pem, String expectedPemTypeSubString, int maxChainDepth) { 244 int beginCount = 0; 245 int endCount = 0; 246 int objCount = 0; 247 Matcher beginMatcher = PEM_BEGIN_PATTERN.matcher(pem); 248 Matcher endMatcher = PEM_END_PATTERN.matcher(pem); 249 Matcher objMatcher = PEM_OBJECT_PATTERN.matcher(pem); 250 251 while (beginMatcher.find()) { 252 beginCount++; 253 } 254 while (endMatcher.find()) { 255 endCount++; 256 } 257 while (objMatcher.find()) { 258 String beginType = objMatcher.group(1); 259 String base64Contents = objMatcher.group(2); 260 String endType = objMatcher.group(3); 261 262 if (!beginType.contains(expectedPemTypeSubString) || !endType.contains(expectedPemTypeSubString)) { 263 throw new IllegalArgumentException( 264 "PEM Object does not have expected type. " + "Expected Type: " + expectedPemTypeSubString 265 + ", Actual BEGIN Type: " + beginType + ", Actual END Type: " + endType); 266 } 267 if (base64Contents.length() == 0) { 268 throw new IllegalArgumentException("PEM Objet does not have any contents"); 269 } 270 objCount++; 271 } 272 273 if (objCount == 0) { 274 throw new IllegalArgumentException("PEM contains no objects, or is not a PEM"); 275 } 276 277 if (beginCount != endCount || beginCount != objCount) { 278 throw new IllegalArgumentException("PEM has mismatching BEGIN and END Delimiters. BeginCount: " + beginCount 279 + ", EndCount: " + endCount + ", ObjCount: " + objCount); 280 } 281 282 if (beginCount > maxChainDepth) { 283 throw new IllegalArgumentException( 284 "PEM has greater than expected depth, ExpectedMax: " + maxChainDepth + ", Actual: " + beginCount); 285 } 286 287 return beginCount; 288 } 289 290 /** 291 * Performs various sanity checks on a PEM Formatted String, and should be 292 * tolerant of common minor mistakes in formatting. 293 * 294 * @param pem The PEM or PEM Chain to validate. 295 * @param maxChainLength The max number of PEM encoded objects in the 296 * String. 297 * @param expectedPemTypeSubString A Substring that is expected to be present in 298 * the PEM Type. 299 * @throws IllegalArgumentException if there is a problem with the PEM formatted 300 * String. 301 */ sanityCheck(String pem, int maxChainLength, String expectedPemTypeSubString)302 public static void sanityCheck(String pem, int maxChainLength, String expectedPemTypeSubString) { 303 if (pem == null || pem.length() == 0) { 304 return; 305 } 306 validateCharacterSet(pem); 307 validatePemByRegexParser(pem, expectedPemTypeSubString, maxChainLength); 308 } 309 310 /** 311 * Returns false if there is a problem with a PEM instead of throwing an 312 * Exception. 313 * 314 * @param pem The PEM to sanity check. 315 * @param maxChainLength The Max number of PEM Objects in the PEM 316 * String 317 * @param expectedPemTypeSubString A Substring that is expected to be present in 318 * the PEM Type. 319 * @return True if the PEM passes all sanity Checks, false otherwise. 320 */ safeSanityCheck(String pem, int maxChainLength, String expectedPemTypeSubString)321 public static boolean safeSanityCheck(String pem, int maxChainLength, String expectedPemTypeSubString) { 322 try { 323 sanityCheck(pem, maxChainLength, expectedPemTypeSubString); 324 } catch (IllegalArgumentException e) { 325 return false; 326 } 327 return true; 328 } 329 } 330