• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /**
3  * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
4  * SPDX-License-Identifier: Apache-2.0.
5  */
6 
7 package software.amazon.awssdk.crt.utils;
8 
9 import java.util.regex.Matcher;
10 import java.util.regex.Pattern;
11 
12 /**
13  * Utility Class used for Cleaning Up and Sanity Checking PEM formatted Strings
14  * for Validity.
15  */
16 public class PemUtils {
17     private static final int ALLOWED_CHARS_PER_LINE = 64;
18     private static final String BASE_64_ENCODING_CHARS = "a-zA-Z0-9=+\\/";
19     private static final String ALLOWED_WHITESPACE_CHARS = " \\r\\n";
20     private static final String DELIMITER_CHARS = "-";
21     private static final String MINUMUM_ALLOWED_PEM_CHARACTER_SET = BASE_64_ENCODING_CHARS + ALLOWED_WHITESPACE_CHARS
22             + DELIMITER_CHARS;
23     private static final String NON_BASE64_CHARACTER_SET = "[^" + BASE_64_ENCODING_CHARS + "]+";
24     private static final String UNNECESSARY_PEM_CHARACTER_SET = "[^" + MINUMUM_ALLOWED_PEM_CHARACTER_SET + "]+";
25 
26     // These Regex's roughly follows the formal RFC Grammar Definition here:
27     // https://tools.ietf.org/html/rfc7468#section-3
28     private static final String PEM_OBJECT_TYPE = "([A-Z0-9 ]+)";
29     private static final String PEM_DASHES = "[-]+";
30     private static final String PEM_NON_DASHES = "([^-]+)";
31     private static final String PEM_BEGIN_LINE_REGEX = PEM_DASHES + "BEGIN " + PEM_OBJECT_TYPE + PEM_DASHES;
32 
33     // Be fairly lenient on PEM Contents regex since formatAccordingToRFC() will
34     // filter out non-Base64 Characters
35     private static final String PEM_CONTENTS_REGEX = PEM_NON_DASHES;
36 
37     private static final String PEM_END_LINE_REGEX = PEM_DASHES + "END " + PEM_OBJECT_TYPE + PEM_DASHES;
38 
39     private static final Pattern INVALID_PEM_CHARACTER_PATTERN = Pattern.compile(UNNECESSARY_PEM_CHARACTER_SET);
40     private static final Pattern PEM_BEGIN_PATTERN = Pattern.compile(PEM_BEGIN_LINE_REGEX);
41     private static final Pattern PEM_END_PATTERN = Pattern.compile(PEM_END_LINE_REGEX);
42 
43     private static final Pattern PEM_OBJECT_PATTERN = Pattern
44             .compile(PEM_BEGIN_LINE_REGEX + PEM_CONTENTS_REGEX + PEM_END_LINE_REGEX);
45 
46     // Pattern for catching when END and BEGIN being on the same line and they need
47     // to be separated.
48     private static final Pattern COMBINED_PEM_HEADERS = Pattern.compile(PEM_END_LINE_REGEX + "BEGIN");
49 
PemUtils()50     private PemUtils() {
51     }
52 
53     /**
54      * Removes characters that are not valid in PEM format (non-base64 chars). No other cleanup is
55      * done.
56      *
57      * @param pem The input "dirty" PEM
58      * @return The output "clean" PEM
59      */
removeInvalidPemChars(String pem)60     public static String removeInvalidPemChars(String pem) {
61         if (pem == null || pem.length() == 0) {
62             return pem;
63         }
64         return pem.replaceAll(UNNECESSARY_PEM_CHARACTER_SET, "");
65     }
66 
67     /**
68      * Removes characters that are not valid in base64. No other cleanup is
69      * done.
70      *
71      * @param base64Contents The input "dirty" PEM
72      * @return The output "clean" PEM
73      */
removeNonBase64Chars(String base64Contents)74     private static String removeNonBase64Chars(String base64Contents) {
75         if (base64Contents == null || base64Contents.length() == 0) {
76             return base64Contents;
77         }
78         return base64Contents.replaceAll(NON_BASE64_CHARACTER_SET, "");
79     }
80 
81     /**
82      * Parses the full PEM Chain one object at a time and rewrites it following the
83      * RFC formatting rules.
84      *
85      * Performs the following operations and fixes: - Base 64 Encoded PEM Content
86      * Formatting: - All lines exactly 64 Characters long except for the last line.
87      * - Only whitespace is a single newline every 64 chars - The number of dashes
88      * "-" on the BEGIN and END lines are exactly 5 dashes - Garbage characters
89      * in-between PEM objects (characters after an END and before the next BEGIN)
90      * are removed
91      *
92      * For more info, see: https://tools.ietf.org/html/rfc1421#section-4.3.2.4
93      *
94      * @param pem The input "dirty" PEM
95      * @return The output "clean" PEM
96      */
formatAccordingToRFC(String pem)97     private static String formatAccordingToRFC(String pem) {
98         if (pem == null || pem.length() == 0) {
99             return pem;
100         }
101         Matcher matcher = PEM_OBJECT_PATTERN.matcher(pem);
102         StringBuffer outBuffer = new StringBuffer();
103 
104         int count = 0;
105         while (matcher.find()) {
106             if (count > 0) {
107                 outBuffer.append('\n');
108             }
109             String beginType = matcher.group(1);
110             String base64Contents = removeNonBase64Chars(matcher.group(2));
111             String endType = matcher.group(3);
112 
113             outBuffer.append("-----BEGIN " + beginType + "-----\n");
114 
115             int index = 0;
116             for (char c : base64Contents.toCharArray()) {
117 
118                 outBuffer.append(c);
119                 index++;
120                 if (index % ALLOWED_CHARS_PER_LINE == 0) {
121                     outBuffer.append('\n');
122                 }
123             }
124             if (outBuffer.charAt(outBuffer.length() - 1) != '\n') {
125                 outBuffer.append('\n');
126             }
127 
128             outBuffer.append("-----END " + endType + "-----");
129             count++;
130         }
131 
132         return outBuffer.toString();
133     }
134 
135     /**
136      * Inserts newlines in combined PEM Headers (Eg "-----END
137      * CERTIFICATE----------BEGIN CERTIFICATE-----")
138      *
139      * @param pem The input "dirty" PEM
140      * @return The output "clean" PEM
141      */
splitCombinedPemHeaders(String pem)142     private static String splitCombinedPemHeaders(String pem) {
143         if (pem == null || pem.length() == 0) {
144             return pem;
145         }
146         Matcher m = COMBINED_PEM_HEADERS.matcher(pem);
147         if (m.find()) {
148             // The Parenthesis in "([A-Z0-9 ]+)" in PEM_OBJECT_TYPE is a capturing group, it
149             // allows us to reference
150             // Group 1 with "$1" in the replaceAll() call here.
151             // For more info see: https://stackoverflow.com/a/27328750/7565918
152             return m.replaceAll("-----END $1-----\n-----BEGIN");
153         }
154         return pem;
155     }
156 
157     /**
158      * Merge consecutive spaces into a single space (Eg "BEGIN     CERTIFICATE", will
159      * become "BEGIN CERTIFICATE")
160      *
161      * @param pem The input "dirty" PEM
162      * @return The output "clean" PEM
163      */
mergeSpaces(String pem)164     private static String mergeSpaces(String pem) {
165         if (pem == null || pem.length() == 0) {
166             return pem;
167         }
168         return pem.replaceAll("[ ]+", " ");
169     }
170 
171     /**
172      * Remove any spaces next to dashes (Eg "----- BEGIN" will become "-----BEGIN")
173      *
174      * @param pem The input "dirty" PEM
175      * @return The output "clean" PEM
176      */
removeSpacesNextToDashes(String pem)177     private static String removeSpacesNextToDashes(String pem) {
178         if (pem == null || pem.length() == 0) {
179             return pem;
180         }
181         return pem.replaceAll("( -)|(- )", "-");
182     }
183 
184     /**
185      * Cleanup Function that removes most formatting and copy/paste mistakes from
186      * PEM formatted Strings.
187      *
188      * @param pem The input "dirty" PEM
189      * @return The output "clean" PEM
190      */
cleanUpPem(String pem)191     public static String cleanUpPem(String pem) {
192         if (pem == null || pem.length() == 0) {
193             return pem;
194         }
195 
196         String cleanPem = removeInvalidPemChars(pem);
197 
198         cleanPem = mergeSpaces(cleanPem);
199 
200         cleanPem = removeSpacesNextToDashes(cleanPem);
201 
202         cleanPem = splitCombinedPemHeaders(cleanPem);
203 
204         cleanPem = formatAccordingToRFC(cleanPem);
205 
206         return cleanPem;
207     }
208 
209     /**
210      * Checks for invalid characters in the PEM (Eg non-ASCII Chars).
211      *
212      * @param pem The input PEM formatted String
213      * @throws IllegalArgumentException If any character in the PEM is outside the
214      *                                  valid Character Set.
215      */
validateCharacterSet(String pem)216     private static void validateCharacterSet(String pem) {
217         // If there are any invalid characters, throw an exception with detailed info
218         // about their indexes
219         if (INVALID_PEM_CHARACTER_PATTERN.matcher(pem).matches()) {
220             StringBuilder debugStr = new StringBuilder();
221             int index = 0;
222             for (char c : pem.toCharArray()) {
223                 if (INVALID_PEM_CHARACTER_PATTERN.matcher(String.valueOf(c)).matches()) {
224                     if (debugStr.length() > 0) {
225                         debugStr.append(", ");
226                     }
227                     debugStr.append("\\u" + (int) c + " at index " + index);
228                 }
229                 index++;
230             }
231             throw new IllegalArgumentException("Illegal Characters found in PEM file. Chars: " + debugStr);
232         }
233     }
234 
235     /**
236      * Checks that the number of "BEGIN" statements matches the number of "END"
237      * statements, and that END's come after BEGIN's.
238      *
239      * @param pem           The input PEM formatted String
240      * @param maxChainDepth The max number of PEM Formatted Objects in the String.
241      * @return The number of PEM encoded objects found by Regex
242      */
validatePemByRegexParser(String pem, String expectedPemTypeSubString, int maxChainDepth)243     private static int validatePemByRegexParser(String pem, String expectedPemTypeSubString, int maxChainDepth) {
244         int beginCount = 0;
245         int endCount = 0;
246         int objCount = 0;
247         Matcher beginMatcher = PEM_BEGIN_PATTERN.matcher(pem);
248         Matcher endMatcher = PEM_END_PATTERN.matcher(pem);
249         Matcher objMatcher = PEM_OBJECT_PATTERN.matcher(pem);
250 
251         while (beginMatcher.find()) {
252             beginCount++;
253         }
254         while (endMatcher.find()) {
255             endCount++;
256         }
257         while (objMatcher.find()) {
258             String beginType = objMatcher.group(1);
259             String base64Contents = objMatcher.group(2);
260             String endType = objMatcher.group(3);
261 
262             if (!beginType.contains(expectedPemTypeSubString) || !endType.contains(expectedPemTypeSubString)) {
263                 throw new IllegalArgumentException(
264                         "PEM Object does not have expected type. " + "Expected Type: " + expectedPemTypeSubString
265                                 + ", Actual BEGIN Type: " + beginType + ", Actual END Type: " + endType);
266             }
267             if (base64Contents.length() == 0) {
268                 throw new IllegalArgumentException("PEM Objet does not have any contents");
269             }
270             objCount++;
271         }
272 
273         if (objCount == 0) {
274             throw new IllegalArgumentException("PEM contains no objects, or is not a PEM");
275         }
276 
277         if (beginCount != endCount || beginCount != objCount) {
278             throw new IllegalArgumentException("PEM has mismatching BEGIN and END Delimiters. BeginCount: " + beginCount
279                     + ", EndCount: " + endCount + ", ObjCount: " + objCount);
280         }
281 
282         if (beginCount > maxChainDepth) {
283             throw new IllegalArgumentException(
284                     "PEM has greater than expected depth, ExpectedMax: " + maxChainDepth + ", Actual: " + beginCount);
285         }
286 
287         return beginCount;
288     }
289 
290     /**
291      * Performs various sanity checks on a PEM Formatted String, and should be
292      * tolerant of common minor mistakes in formatting.
293      *
294      * @param pem                      The PEM or PEM Chain to validate.
295      * @param maxChainLength           The max number of PEM encoded objects in the
296      *                                 String.
297      * @param expectedPemTypeSubString A Substring that is expected to be present in
298      *                                 the PEM Type.
299      * @throws IllegalArgumentException if there is a problem with the PEM formatted
300      *                                  String.
301      */
sanityCheck(String pem, int maxChainLength, String expectedPemTypeSubString)302     public static void sanityCheck(String pem, int maxChainLength, String expectedPemTypeSubString) {
303         if (pem == null || pem.length() == 0) {
304             return;
305         }
306         validateCharacterSet(pem);
307         validatePemByRegexParser(pem, expectedPemTypeSubString, maxChainLength);
308     }
309 
310     /**
311      * Returns false if there is a problem with a PEM instead of throwing an
312      * Exception.
313      *
314      * @param pem                      The PEM to sanity check.
315      * @param maxChainLength           The Max number of PEM Objects in the PEM
316      *                                 String
317      * @param expectedPemTypeSubString A Substring that is expected to be present in
318      *                                 the PEM Type.
319      * @return True if the PEM passes all sanity Checks, false otherwise.
320      */
safeSanityCheck(String pem, int maxChainLength, String expectedPemTypeSubString)321     public static boolean safeSanityCheck(String pem, int maxChainLength, String expectedPemTypeSubString) {
322         try {
323             sanityCheck(pem, maxChainLength, expectedPemTypeSubString);
324         } catch (IllegalArgumentException e) {
325             return false;
326         }
327         return true;
328     }
329 }
330