1 // Copyright 2017 PDFium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
7 #include "core/fxcrt/cfx_seekablestreamproxy.h"
8
9 #if _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
10 #include <io.h>
11 #endif // _FX_PLATFORM_ == _FX_PLATFORM_WINDOWS_
12
13 #include <algorithm>
14 #include <limits>
15 #include <memory>
16 #include <utility>
17 #include <vector>
18
19 #include "core/fxcrt/cfx_memorystream.h"
20 #include "core/fxcrt/fx_codepage.h"
21 #include "core/fxcrt/fx_extension.h"
22 #include "third_party/base/ptr_util.h"
23 #include "third_party/base/stl_util.h"
24
25 namespace {
26
27 // Returns {src bytes consumed, dst bytes produced}.
UTF8Decode(const char * pSrc,size_t srcLen,wchar_t * pDst,size_t dstLen)28 std::pair<size_t, size_t> UTF8Decode(const char* pSrc,
29 size_t srcLen,
30 wchar_t* pDst,
31 size_t dstLen) {
32 ASSERT(pDst && dstLen > 0);
33
34 if (srcLen < 1)
35 return {0, 0};
36
37 uint32_t dwCode = 0;
38 int32_t iPending = 0;
39 size_t iSrcNum = 0;
40 size_t iDstNum = 0;
41 size_t iIndex = 0;
42 int32_t k = 1;
43 while (iIndex < srcLen) {
44 uint8_t byte = static_cast<uint8_t>(*(pSrc + iIndex));
45 if (byte < 0x80) {
46 iPending = 0;
47 k = 1;
48 iDstNum++;
49 iSrcNum += k;
50 *pDst++ = byte;
51 if (iDstNum >= dstLen)
52 break;
53 } else if (byte < 0xc0) {
54 if (iPending < 1)
55 break;
56
57 iPending--;
58 dwCode |= (byte & 0x3f) << (iPending * 6);
59 if (iPending == 0) {
60 iDstNum++;
61 iSrcNum += k;
62 *pDst++ = dwCode;
63 if (iDstNum >= dstLen)
64 break;
65 }
66 } else if (byte < 0xe0) {
67 iPending = 1;
68 k = 2;
69 dwCode = (byte & 0x1f) << 6;
70 } else if (byte < 0xf0) {
71 iPending = 2;
72 k = 3;
73 dwCode = (byte & 0x0f) << 12;
74 } else if (byte < 0xf8) {
75 iPending = 3;
76 k = 4;
77 dwCode = (byte & 0x07) << 18;
78 } else if (byte < 0xfc) {
79 iPending = 4;
80 k = 5;
81 dwCode = (byte & 0x03) << 24;
82 } else if (byte < 0xfe) {
83 iPending = 5;
84 k = 6;
85 dwCode = (byte & 0x01) << 30;
86 } else {
87 break;
88 }
89 iIndex++;
90 }
91 return {iSrcNum, iDstNum};
92 }
93
UTF16ToWChar(void * pBuffer,size_t iLength)94 void UTF16ToWChar(void* pBuffer, size_t iLength) {
95 ASSERT(pBuffer);
96 ASSERT(iLength > 0);
97 ASSERT(sizeof(wchar_t) > 2);
98
99 uint16_t* pSrc = static_cast<uint16_t*>(pBuffer);
100 wchar_t* pDst = static_cast<wchar_t*>(pBuffer);
101 for (size_t i = 0; i < iLength; i++)
102 pDst[i] = static_cast<wchar_t>(pSrc[i]);
103 }
104
SwapByteOrder(wchar_t * pStr,size_t iLength)105 void SwapByteOrder(wchar_t* pStr, size_t iLength) {
106 ASSERT(pStr);
107
108 uint16_t wch;
109 if (sizeof(wchar_t) > 2) {
110 while (iLength-- > 0) {
111 wch = static_cast<uint16_t>(*pStr);
112 wch = (wch >> 8) | (wch << 8);
113 wch &= 0x00FF;
114 *pStr = wch;
115 ++pStr;
116 }
117 return;
118 }
119
120 while (iLength-- > 0) {
121 wch = static_cast<uint16_t>(*pStr);
122 wch = (wch >> 8) | (wch << 8);
123 *pStr = wch;
124 ++pStr;
125 }
126 }
127
128 } // namespace
129
130 #define BOM_MASK 0x00FFFFFF
131 #define BOM_UTF8 0x00BFBBEF
132 #define BOM_UTF16_MASK 0x0000FFFF
133 #define BOM_UTF16_BE 0x0000FFFE
134 #define BOM_UTF16_LE 0x0000FEFF
135
CFX_SeekableStreamProxy(const RetainPtr<IFX_SeekableStream> & stream,bool isWriteStream)136 CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(
137 const RetainPtr<IFX_SeekableStream>& stream,
138 bool isWriteStream)
139 : m_IsWriteStream(isWriteStream),
140 m_wCodePage(FX_CODEPAGE_DefANSI),
141 m_wBOMLength(0),
142 m_iPosition(0),
143 m_pStream(stream) {
144 ASSERT(m_pStream);
145
146 if (isWriteStream) {
147 m_iPosition = m_pStream->GetSize();
148 return;
149 }
150
151 Seek(From::Begin, 0);
152
153 uint32_t bom = 0;
154 ReadData(reinterpret_cast<uint8_t*>(&bom), 3);
155
156 bom &= BOM_MASK;
157 if (bom == BOM_UTF8) {
158 m_wBOMLength = 3;
159 m_wCodePage = FX_CODEPAGE_UTF8;
160 } else {
161 bom &= BOM_UTF16_MASK;
162 if (bom == BOM_UTF16_BE) {
163 m_wBOMLength = 2;
164 m_wCodePage = FX_CODEPAGE_UTF16BE;
165 } else if (bom == BOM_UTF16_LE) {
166 m_wBOMLength = 2;
167 m_wCodePage = FX_CODEPAGE_UTF16LE;
168 } else {
169 m_wBOMLength = 0;
170 m_wCodePage = FXSYS_GetACP();
171 }
172 }
173
174 Seek(From::Begin, static_cast<FX_FILESIZE>(m_wBOMLength));
175 }
176
CFX_SeekableStreamProxy(uint8_t * data,size_t size)177 CFX_SeekableStreamProxy::CFX_SeekableStreamProxy(uint8_t* data, size_t size)
178 : CFX_SeekableStreamProxy(
179 pdfium::MakeRetain<CFX_MemoryStream>(data, size, false),
180 false) {}
181
~CFX_SeekableStreamProxy()182 CFX_SeekableStreamProxy::~CFX_SeekableStreamProxy() {}
183
Seek(From eSeek,FX_FILESIZE iOffset)184 void CFX_SeekableStreamProxy::Seek(From eSeek, FX_FILESIZE iOffset) {
185 switch (eSeek) {
186 case From::Begin:
187 m_iPosition = iOffset;
188 break;
189 case From::Current: {
190 pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
191 new_pos += iOffset;
192 m_iPosition =
193 new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max());
194 } break;
195 }
196 m_iPosition =
197 pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength());
198 }
199
SetCodePage(uint16_t wCodePage)200 void CFX_SeekableStreamProxy::SetCodePage(uint16_t wCodePage) {
201 if (m_wBOMLength > 0)
202 return;
203 m_wCodePage = wCodePage;
204 }
205
ReadData(uint8_t * pBuffer,size_t iBufferSize)206 size_t CFX_SeekableStreamProxy::ReadData(uint8_t* pBuffer, size_t iBufferSize) {
207 ASSERT(pBuffer && iBufferSize > 0);
208
209 if (m_IsWriteStream)
210 return 0;
211
212 iBufferSize =
213 std::min(iBufferSize, static_cast<size_t>(GetLength() - m_iPosition));
214 if (iBufferSize <= 0)
215 return 0;
216
217 if (!m_pStream->ReadBlock(pBuffer, m_iPosition, iBufferSize))
218 return 0;
219
220 pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
221 new_pos += iBufferSize;
222 m_iPosition = new_pos.ValueOrDefault(m_iPosition);
223 return new_pos.IsValid() ? iBufferSize : 0;
224 }
225
ReadString(wchar_t * pStr,size_t iMaxLength,bool * bEOS)226 size_t CFX_SeekableStreamProxy::ReadString(wchar_t* pStr,
227 size_t iMaxLength,
228 bool* bEOS) {
229 if (!pStr || iMaxLength == 0)
230 return 0;
231
232 if (m_IsWriteStream)
233 return 0;
234
235 if (m_wCodePage == FX_CODEPAGE_UTF16LE ||
236 m_wCodePage == FX_CODEPAGE_UTF16BE) {
237 size_t iBytes = iMaxLength * 2;
238 size_t iLen = ReadData(reinterpret_cast<uint8_t*>(pStr), iBytes);
239 iMaxLength = iLen / 2;
240 if (sizeof(wchar_t) > 2 && iMaxLength > 0)
241 UTF16ToWChar(pStr, iMaxLength);
242
243 if (m_wCodePage == FX_CODEPAGE_UTF16BE)
244 SwapByteOrder(pStr, iMaxLength);
245
246 } else {
247 FX_FILESIZE pos = GetPosition();
248 size_t iBytes =
249 std::min(iMaxLength, static_cast<size_t>(GetLength() - pos));
250
251 if (iBytes > 0) {
252 std::vector<uint8_t> buf(iBytes);
253
254 size_t iLen = ReadData(buf.data(), iBytes);
255 if (m_wCodePage != FX_CODEPAGE_UTF8)
256 return 0;
257
258 size_t iSrc = 0;
259 std::tie(iSrc, iMaxLength) = UTF8Decode(
260 reinterpret_cast<const char*>(buf.data()), iLen, pStr, iMaxLength);
261 Seek(From::Current, iSrc - iLen);
262 } else {
263 iMaxLength = 0;
264 }
265 }
266
267 *bEOS = IsEOF();
268 return iMaxLength;
269 }
270
WriteString(const WideStringView & str)271 void CFX_SeekableStreamProxy::WriteString(const WideStringView& str) {
272 if (!m_IsWriteStream || str.GetLength() == 0 ||
273 m_wCodePage != FX_CODEPAGE_UTF8) {
274 return;
275 }
276 if (!m_pStream->WriteBlock(str.unterminated_c_str(), m_iPosition,
277 str.GetLength() * sizeof(wchar_t))) {
278 return;
279 }
280
281 pdfium::base::CheckedNumeric<FX_FILESIZE> new_pos = m_iPosition;
282 new_pos += str.GetLength() * sizeof(wchar_t);
283 m_iPosition = new_pos.ValueOrDefault(std::numeric_limits<FX_FILESIZE>::max());
284 m_iPosition =
285 pdfium::clamp(m_iPosition, static_cast<FX_FILESIZE>(0), GetLength());
286 }
287