1 //===- split-file.cpp - Input splitting utility ---------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Split input into multipe parts separated by regex '^(.|//)--- ' and extract
10 // the specified part.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/ADT/DenseMap.h"
15 #include "llvm/ADT/StringExtras.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/FileOutputBuffer.h"
19 #include "llvm/Support/LineIterator.h"
20 #include "llvm/Support/MemoryBuffer.h"
21 #include "llvm/Support/Path.h"
22 #include "llvm/Support/ToolOutputFile.h"
23 #include "llvm/Support/WithColor.h"
24 #include <string>
25 #include <system_error>
26
27 using namespace llvm;
28
29 static cl::OptionCategory cat("split-file Options");
30
31 static cl::opt<std::string> input(cl::Positional, cl::desc("filename"),
32 cl::cat(cat));
33
34 static cl::opt<std::string> output(cl::Positional, cl::desc("directory"),
35 cl::value_desc("directory"), cl::cat(cat));
36
37 static cl::opt<bool> noLeadingLines("no-leading-lines",
38 cl::desc("Don't preserve line numbers"),
39 cl::cat(cat));
40
41 static StringRef toolName;
42 static int errorCount;
43
fatal(StringRef filename,const Twine & message)44 LLVM_ATTRIBUTE_NORETURN static void fatal(StringRef filename,
45 const Twine &message) {
46 if (filename.empty())
47 WithColor::error(errs(), toolName) << message << '\n';
48 else
49 WithColor::error(errs(), toolName) << filename << ": " << message << '\n';
50 exit(1);
51 }
52
error(StringRef filename,int64_t line,const Twine & message)53 static void error(StringRef filename, int64_t line, const Twine &message) {
54 ++errorCount;
55 errs() << filename << ':' << line << ": ";
56 WithColor::error(errs()) << message << '\n';
57 }
58
59 namespace {
60 struct Part {
61 const char *begin = nullptr;
62 const char *end = nullptr;
63 int64_t leadingLines = 0;
64 };
65 } // namespace
66
handle(MemoryBuffer & inputBuf,StringRef input)67 static int handle(MemoryBuffer &inputBuf, StringRef input) {
68 DenseMap<StringRef, Part> partToBegin;
69 StringRef lastPart, separator;
70 for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
71 const int64_t lineNo = i.line_number();
72 const StringRef line = *i++;
73 const size_t markerLen = line.startswith("//") ? 6 : 5;
74 if (!(line.size() >= markerLen &&
75 line.substr(markerLen - 4).startswith("--- ")))
76 continue;
77 separator = line.substr(0, markerLen);
78 const StringRef partName = line.substr(markerLen);
79 if (partName.empty()) {
80 error(input, lineNo, "empty part name");
81 continue;
82 }
83 if (isSpace(partName.front()) || isSpace(partName.back())) {
84 error(input, lineNo, "part name cannot have leading or trailing space");
85 continue;
86 }
87
88 auto res = partToBegin.try_emplace(partName);
89 if (!res.second) {
90 error(input, lineNo,
91 "'" + separator + partName + "' occurs more than once");
92 continue;
93 }
94 if (!lastPart.empty())
95 partToBegin[lastPart].end = line.data();
96 Part &cur = res.first->second;
97 if (!i.is_at_eof())
98 cur.begin = i->data();
99 // If --no-leading-lines is not specified, numEmptyLines is 0. Append
100 // newlines so that the extracted part preserves line numbers.
101 cur.leadingLines = noLeadingLines ? 0 : i.line_number() - 1;
102
103 lastPart = partName;
104 }
105 if (lastPart.empty())
106 fatal(input, "no part separator was found");
107 if (errorCount)
108 return 1;
109 partToBegin[lastPart].end = inputBuf.getBufferEnd();
110
111 std::vector<std::unique_ptr<ToolOutputFile>> outputFiles;
112 SmallString<256> partPath;
113 for (auto &keyValue : partToBegin) {
114 partPath.clear();
115 sys::path::append(partPath, output, keyValue.first);
116 std::error_code ec =
117 sys::fs::create_directories(sys::path::parent_path(partPath));
118 if (ec)
119 fatal(input, ec.message());
120 auto f = std::make_unique<ToolOutputFile>(partPath.str(), ec,
121 llvm::sys::fs::OF_None);
122 if (!f)
123 fatal(input, ec.message());
124
125 Part &part = keyValue.second;
126 for (int64_t i = 0; i != part.leadingLines; ++i)
127 (*f).os().write('\n');
128 if (part.begin)
129 (*f).os().write(part.begin, part.end - part.begin);
130 outputFiles.push_back(std::move(f));
131 }
132
133 for (std::unique_ptr<ToolOutputFile> &outputFile : outputFiles)
134 outputFile->keep();
135 return 0;
136 }
137
main(int argc,const char ** argv)138 int main(int argc, const char **argv) {
139 toolName = sys::path::stem(argv[0]);
140 cl::HideUnrelatedOptions({&cat});
141 cl::ParseCommandLineOptions(
142 argc, argv,
143 "Split input into multiple parts separated by regex '^(.|//)--- ' and "
144 "extract the part specified by '^(.|//)--- <part>'\n",
145 nullptr,
146 /*EnvVar=*/nullptr,
147 /*LongOptionsUseDoubleDash=*/true);
148
149 if (input.empty())
150 fatal("", "input filename is not specified");
151 if (output.empty())
152 fatal("", "output directory is not specified");
153 ErrorOr<std::unique_ptr<MemoryBuffer>> bufferOrErr =
154 MemoryBuffer::getFileOrSTDIN(input);
155 if (std::error_code ec = bufferOrErr.getError())
156 fatal(input, ec.message());
157
158 // Delete output if it is a file or an empty directory, so that we can create
159 // a directory.
160 sys::fs::file_status status;
161 if (std::error_code ec = sys::fs::status(output, status))
162 if (ec.value() != static_cast<int>(std::errc::no_such_file_or_directory))
163 fatal(output, ec.message());
164 if (status.type() != sys::fs::file_type::file_not_found &&
165 status.type() != sys::fs::file_type::directory_file &&
166 status.type() != sys::fs::file_type::regular_file)
167 fatal(output, "output cannot be a special file");
168 if (std::error_code ec = sys::fs::remove(output, /*IgnoreNonExisting=*/true))
169 if (ec.value() != static_cast<int>(std::errc::directory_not_empty) &&
170 ec.value() != static_cast<int>(std::errc::file_exists))
171 fatal(output, ec.message());
172 return handle(**bufferOrErr, input);
173 }
174