1# 2# Copyright (C) 2019 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16"""A helper script for validateRefactor.sh. Should generally not be used directly. 17 18Can be used directly if validateRefactor.sh has already created the out-old & out-new dirs. 19In such a case, it can be run to compare those directories without regenerating them. 20This is generally only useful when updating baselines or iterating on this script itself. 21Takes baseline names as CLI arguments, which may be passed through from validateRefactor.sh. 22 23Typical usage example: 24 25 python validateRefactorHelper.py agpKmp 26""" 27import itertools 28import logging 29import queue 30import re 31import shutil 32import subprocess 33import sys 34import threading 35from typing import Dict 36 37logger = logging.getLogger(__name__) 38logging.basicConfig(level=logging.INFO) 39 40# noto-emoji-compat `bundleinside`s an externally-built with-timestamps jar. 41# classes.jar is compared using `diffuse` instead of unzipping and diffing class files. 42bannedJars = ["-x", "noto-emoji-compat-java.jar", "-x", "classes.jar"] 43# java and json aren't for unzipping, but the poor exclude-everything-but-jars regex doesn't 44# exclude them. Same for exclude-non-klib and .kt/.knm 45areNotZips = ["-x", r"**\.java", "-x", r"**\.json", "-x", r"**\.kt", "-x", r"**\.knm", "-x", r"**\.xml", 46 "-x", r"**\.sha1", "-x", r"**\.sha256", "-x", r"**\.sha512", "-x", r"**\.md5", 47 "-x", r"**\.module", "-x", r"**\.pom", "-x", r"**\.html"] 48# keeps making my regexes fall over :( 49hasNoExtension = ["-x", "manifest", "-x", "module"] 50doNotUnzip = bannedJars + areNotZips + hasNoExtension 51 52def diff(excludes): 53 return popenAndReturn(["diff", "-r", "../../out-old/dist/", "../../out-new/dist/"] + excludes) 54 55def popenAndReturn(args): 56 logger.debug(" ".join(args)) 57 return subprocess.Popen(args, stdout=subprocess.PIPE).stdout.read().decode("utf-8").split("\n") 58 59# Finds and unzips all files with old/new diff that _do not_ match the argument regexes. 60# Because the `diff` command doesn't have an --include, only --exclude. 61def findFilesNotMatchingWithDiffAndUnzip(*regexesToExclude): 62 excludeArgs = list(itertools.chain.from_iterable(zip(["-x"]*9, regexesToExclude))) 63 # Exclude all things that are *not* the desired zip type 64 zipsWithDiffs = diff(["-q"] + excludeArgs + doNotUnzip) 65 # Take only changed files, not new/deleted ones (the diff there is obvious) 66 zipsWithDiffs = filter(lambda s: s.startswith("Files"), zipsWithDiffs) 67 zipsWithDiffs = map(lambda s: s.split()[1:4:2], zipsWithDiffs) 68 zipsWithDiffs = itertools.chain.from_iterable(zipsWithDiffs) # flatten 69 workQueueOfZips = queue.LifoQueue() 70 for it in zipsWithDiffs: workQueueOfZips.put(it) 71 # And unzip them 72 # If we spam unzip commands without a break, the unzips start failing. 73 # if we wait after every Popen, the script runs very slowly 74 # So create a pool of 10 unzip workers to consume from zipsWithDiffs 75 numWorkers = 10 76 workers = [] 77 for i in range(min(numWorkers, workQueueOfZips.qsize())): 78 w = threading.Thread(target=unzipWorker, args=(workQueueOfZips,)) 79 w.start() 80 workers.append(w) 81 for w in workers: w.join() 82 83def unzipWorker(workQueueOfZips): 84 while not workQueueOfZips.empty(): 85 zipFilePath = workQueueOfZips.get(0) 86 try: shutil.rmtree(zipFilePath+".unzipped/") 87 except FileNotFoundError: pass 88 logger.debug("unzipping " + zipFilePath) 89 subprocess.Popen(["unzip", "-qq", "-o", zipFilePath, "-d", zipFilePath+".unzipped/"]).wait() 90 91diffusePath = "../../prebuilts/build-tools/diffuse/diffuse-0.3.0/bin/diffuser" 92 93diffuseIsPresent = True 94def compareWithDiffuse(listOfJars): 95 global diffuseIsPresent 96 if not diffuseIsPresent: return 97 for jarPath in list(filter(None, listOfJars)): 98 logger.info("jarpath: " + jarPath) 99 newJarPath = jarPath.replace("out-old", "out-new") 100 try: logger.info("\n".join(popenAndReturn([diffusePath, "diff", "--jar", jarPath, newJarPath]))) 101 except FileNotFoundError: 102 logger.warning(f"https://github.com/JakeWharton/diffuse is not present on disk in expected location" 103 f" ${diffusePath}. You can install it.") 104 diffuseIsPresent = False 105 return 106 107# We might care to know whether .sha1 or .md5 files have changed, but changes in those files will 108# always be accompanied by more meaningful changes in other files, so we don"t need to show changes 109# in .sha1 or .md5 files, or in .module files showing the hashes of other files, or config names. 110excludedHashes = ["-x", "*.md5*", "-x", "*.sha**", "-I", " \"md5\".*", 111"-I", " \"sha.*", "-I", " \"size\".*", "-I", " \"name\".*"] 112# Don"t care about maven-metadata files because they have timestamps in them. 113# temporarily ignore knm files 114# If changes to the dackka args json are meaningful, they will affect the generated docs and show diff there 115excludedFiles = ["-x", "*maven-metadata.xml**", "-x", r"**\.knm", "-x", "dackkaArgs-docs-tip-of-tree.json"] 116# Also, ignore files that we already unzipped 117excludedZips = ["-x", "*.zip", "-x", "*.jar", "-x", "*.aar", "-x", "*.apk", "-x", "*.klib"] 118 119# These are baselined changes that we understand and know are no-ops in refactors 120# "Unskippable" changes are multi-line and can't be skipped in `diff`, so post-process 121baselinedChangesForAgpKmp = [ 122 # these are new attributes being added 123 """> "org.gradle.libraryelements": "aar",""", 124 """> "org.gradle.jvm.environment": "android",""", 125 """> "org.gradle.jvm.environment": "non-jvm",""", 126 """> "org.gradle.jvm.environment": "standard-jvm",""", 127 """> <type>aar</type>""", 128 # this attribute swap occurs alongside the above new attributes added. 129 # https://chat.google.com/room/AAAAW8qmCIs/4phaNn_gsrc 130 """< "org.jetbrains.kotlin.platform.type": "androidJvm\"""", 131 """> "org.jetbrains.kotlin.platform.type": "jvm\"""", 132 # name-only change; nothing resolves based on names 133 """< "name": "releaseApiElements-published",""", 134 """> "name": "androidApiElements-published",""", 135 """ <pre>actual typealias""", # open bug in dackka b/339221337 136 # we are switching from our KMP sourcejars solution to the upstream one 137 """< "org.gradle.docstype": "fake-sources",""", 138 """> "org.gradle.docstype": "sources",""", 139] 140unskippableBaselinedChangesForAgpKmp = [ 141# This was an AGP workaround for a dependency resolution issue for kotlin stdlib 142# https://chat.google.com/room/AAAAW8qmCIs/4phaNn_gsrc 143re.compile(r""" 144[0-9]+,[0-9]+c[0-9]+ 145< \}, 146< "excludes": \[ 147< \{ 148< "group": "org.jetbrains.kotlin", 149< "module": "kotlin-stdlib-common" 150< \}, 151< \{ 152< "group": "org.jetbrains.kotlin", 153< "module": "kotlin-test-common" 154< \}, 155< \{ 156< "group": "org.jetbrains.kotlin", 157< "module": "kotlin-test-annotations-common" 158< \} 159< \] 160--- 161> \}"""), 162re.compile(r""" 163< <exclusions> 164< <exclusion> 165< <groupId>org.jetbrains.kotlin</groupId> 166< <artifactId>kotlin-stdlib-common</artifactId> 167< </exclusion> 168< <exclusion> 169< <groupId>org.jetbrains.kotlin</groupId> 170< <artifactId>kotlin-test-common</artifactId> 171< </exclusion> 172< <exclusion> 173< <groupId>org.jetbrains.kotlin</groupId> 174< <artifactId>kotlin-test-annotations-common</artifactId> 175< </exclusion> 176< </exclusions>"""), 177# .module files[] blocks aren't ordered; baseline reordering of samples-sources b/374956513 178re.compile(r""" 179[0-9]+,[0-9]+d[0-9]+ 180< "name": "[a-z3\-]+-[0-9].[0-9].[0-9](-[a-z0-9]+)?-samples-sources.jar", 181< "url": "[a-z3\-]+-[0-9].[0-9].[0-9](-[a-z0-9]+)?-samples-sources.jar", 182< "size": [0-9]+, 183< "sha512": "[0-9a-z]+", 184< "sha256": "[0-9a-z]+", 185< "sha1": "[0-9a-z]+", 186< "md5": "[0-9a-z]+" 187< \}, 188< \{ 189[0-9]+a[0-9]+,[0-9]+ 190> \}, 191> \{ 192> "name": "[a-z3\-]+-[0-9].[0-9].[0-9](-[a-z0-9]+)?-samples-sources.jar", 193> "url": "[a-z3\-]+-[0-9].[0-9].[0-9](-[a-z0-9]+)?-samples-sources.jar", 194> "size": [0-9]+, 195> "sha512": "[0-9a-z]+", 196> "sha256": "[0-9a-z]+", 197> "sha1": "[0-9a-z]+", 198> "md5": "[0-9a-z]+" 199"""), 200# This one is okay because the common pom expresses a dependency on the jvm pom 201# https://repo1.maven.org/maven2/org/jetbrains/kotlinx/kotlinx-coroutines-core/1.7.3/kotlinx-coroutines-core-1.7.3.pom 202re.compile(r"""[0-9]+c[0-9]+ 203< <artifactId>kotlinx-coroutines-core-jvm</artifactId> 204--- 205> <artifactId>kotlinx-coroutines-core</artifactId>"""), 206# AGP-KMP adds a new default sourceSet, which in itself doesn't do anything 207re.compile(r"""(11,17d10|12,18d11) 208< "name": "androidRelease", 209< "dependencies": \[ 210< "commonMain" 211< \], 212< "analysisPlatform": "jvm" 213< \}, 214< \{ 215"""), 216] 217 218baselines = [] 219baselinedChanges = [] 220unskippableBaselinedChanges = [] 221arguments = sys.argv[1:] 222if "agpKmp" in arguments: 223 arguments.remove("agpKmp"); baselines += ["agpKmp"] 224 logger.info("IGNORING DIFF FOR agpKmp") 225 baselinedChanges += baselinedChangesForAgpKmp 226 unskippableBaselinedChanges += unskippableBaselinedChangesForAgpKmp 227 excludedFiles += ["-x", r"**\.aar.unzipped/res"] # agp-kmp may add this empty 228if arguments: 229 logger.error("invalid argument(s) for validateRefactorHelper: " + ", ".join(arguments)) 230 logger.error("currently recognized arguments: agpKmp") 231 exit() 232 233# interleave "-I" to tell diffutils to 'I'gnore the baselined lines 234baselinedChangesArgs = list(itertools.chain.from_iterable(zip(["-I"]*99, [it.removeprefix(">").removeprefix("<") for it in baselinedChanges]))) 235 236def removeLinesStartingWith(listOfStrings, listOfStringsToMatchAgainst): 237 return [line for line in listOfStrings if not any(line.startswith(it) for it in listOfStringsToMatchAgainst)] 238 239# removeLinesWithChangedSuffixes(["foo"], ["foo-bar"], "-bar") returns [], [] 240def removeLinesWithChangedSuffixes(newStrings, oldStrings, newSuffix, oldSuffix=""): 241 possibleIndices = [i for i, string in enumerate(newStrings) if string.endswith(newSuffix)] 242 convertedMap: Dict[int, str] = {i: newStrings[i].replace(newSuffix, oldSuffix) for i in possibleIndices} 243 confirmedIndicesNew = [i for i, converted in convertedMap.items() if converted in oldStrings] 244 confirmedIndicesOld = [oldStrings.index(convertedMap[i]) for i in confirmedIndicesNew] 245 resultNew = [string for i, string in enumerate(newStrings) if i not in confirmedIndicesNew] 246 resultOld = [string for i, string in enumerate(oldStrings) if i not in confirmedIndicesOld] 247 return resultNew, resultOld 248 249# remove baselined elements from a single diff segment, starting with a location-in-file element like 223c220 250def processDiffSegment(segment, fileExtension): 251 if segment == "": return "" 252 lines = segment.split("\n") 253 lines = removeLinesStartingWith(lines, baselinedChanges) 254 removed = [line[1:] for line in lines if line.startswith("<")] 255 added = [line[1:] for line in lines if line.startswith(">")] 256 if (fileExtension == "pom") and "agpKmp" in baselines: 257 # Ignore artifactIds' new -jvm and -android suffixes in poms b/356612738 258 added, removed = removeLinesWithChangedSuffixes(added, removed, "-jvm</artifactId>", "</artifactId>") 259 added, removed = removeLinesWithChangedSuffixes(added, removed, "-android</artifactId>", "</artifactId>") 260 keptContentLines = set(">" + it for it in added).union(set("<" + it for it in removed)) 261 # Do not keep any formatting lines or the header if there is no content 262 if len(keptContentLines) == 0: return "" 263 # return value is based on `lines` because we want to retain ordering we may have lost during processing 264 # We want to keep keptContentLines, and formatting lines like "---" and the header (which don't start with <>). 265 return "\n".join([line for line in lines if (line != "") and ((not line[0] in "<>") or line in keptContentLines)]) 266 267# The output of diff entails multiple files, and multiple segments per file 268# This function removes baselined changes from the entire diff output 269def processMegaDiff(inputString): 270 perFileDiffs = inputString.split("diff -r") 271 processedPerFileDiffs = [] 272 for i in range(1, len(perFileDiffs)): 273 diffStatement, _, diffContent = perFileDiffs[i].partition("\n") 274 newFilePath = diffStatement.rpartition(" ")[2] 275 fileExtension = newFilePath.rpartition(".")[2] 276 for multilineBaselinedElement in unskippableBaselinedChanges: 277 diffContent = multilineBaselinedElement.sub("", diffContent) 278 diffSegments = re.split(r'(^[0-9]+[0-9acd,]*\n)', diffContent, flags=re.MULTILINE) 279 result = [] 280 # every other segment is a segment header like 99,112d87; 0th is "" 281 for j in range(1, len(diffSegments)-1, 2): 282 # a complete segment is a location-in-file header and everything until the next header. E.g. 283 # 83c70 284 # < <artifactId>kotlinx-coroutines-core-jvm</artifactId> 285 # --- 286 # > <artifactId>kotlinx-coroutines-core</artifactId> 287 segment = diffSegments[j] + diffSegments[j+1] 288 processedSegment = processDiffSegment(segment, fileExtension) 289 if processedSegment != "": result.append(processedSegment) 290 if len(result) != 0: processedPerFileDiffs += [newFilePath + "\n" + "\n".join(result)] 291 return "\ndiff ".join(processedPerFileDiffs) 292 293# We unzip multiple times in this order because e.g. zips can contain apks. 294# Find all zip files with a diff, e.g. the tip-of-tree-repository file, and maybe the docs zip 295logger.info("UNZIPPING ZIP FILES"); 296findFilesNotMatchingWithDiffAndUnzip(r"**\.[^z][a-z]*") 297# Find all aar and apk files with a diff. The proper regex would be `.*\..*[^akpr]+.*`, but it 298# doesn"t work in difftools exclude's very limited regex syntax. 299logger.info("UNZIPPING AAR/APK FILES"); 300findFilesNotMatchingWithDiffAndUnzip(r"**\.zip", r"**\.jar", r"**\.klib") 301# Find all jars and klibs and unzip them (comes after because they could be inside aars/apks). 302logger.info("UNZIPPING JAR/KLIB FILES"); 303findFilesNotMatchingWithDiffAndUnzip(r"**\.zip", r"**\.aar", r"**\.apk") 304 305# now find all diffs in classes.jars 306# TODO(375636734) Disabled because this tracks internal methods' diffs 307# classesJarsWithDiffs = popenAndReturn(["find", "../../out-old/dist/", "-name", "classes.jar"]) 308# logger.info("classes.jar s: " + str(classesJarsWithDiffs)) 309# compareWithDiffuse(classesJarsWithDiffs) 310 311# Now find all diffs in non-zipped files 312finalExcludes = excludedHashes + excludedFiles + excludedZips + baselinedChangesArgs 313finalDiff = "\n".join(diff(finalExcludes)) 314finalDiff = processMegaDiff(finalDiff) 315print(finalDiff) 316