1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #define MLOG_TAG "DuplicatePhotoOperation"
17
18 #include "duplicate_photo_operation.h"
19
20 #include "media_file_utils.h"
21 #include "media_log.h"
22 #include "medialibrary_rdbstore.h"
23 #include "medialibrary_tracer.h"
24 #include "medialibrary_unistore_manager.h"
25 #include "photo_query_filter.h"
26
27 namespace OHOS {
28 namespace Media {
29 std::once_flag DuplicatePhotoOperation::onceFlag_;
30
31 const std::string ASTERISK = "*";
32
33 const std::string SELECT_COLUMNS = "SELECT_COLUMNS";
34 const std::string NORMALIZED_PHOTOS = "NormalizedPhotos";
35 const std::string NORMALIZED_TITLE = "normalized_title";
36
37 const std::string LIMIT_CLAUSE = "LIMIT ?";
38 const std::string OFFSET_CLAUSE = "OFFSET ?";
39
40 const std::string IDX_DUPLICATE_ASSETS = "\
41 CREATE INDEX \
42 IF \
43 NOT EXISTS idx_duplicate_assets ON Photos (title, size, orientation)";
44
GetNormalizedPhotosSubquery()45 static std::string GetNormalizedPhotosSubquery()
46 {
47 static const std::string SQL_NORMALIZED_PHOTO_SUBQUERY = " \
48 SELECT \
49 *, \
50 REGEXP_REPLACE(title, '(_[0-9]{2})?((_[0-9])|(\\([0-9]*\\)))*$', '') AS " + NORMALIZED_TITLE + " \
51 FROM \
52 Photos \
53 WHERE " +
54 PhotoQueryFilter::GetSqlWhereClause(PhotoQueryFilter::Option::FILTER_VISIBLE) + " ";
55
56 return SQL_NORMALIZED_PHOTO_SUBQUERY;
57 }
58
GetAllDuplicateImageAssetsCTE()59 static std::string GetAllDuplicateImageAssetsCTE()
60 {
61 static const std::string SQL_ALL_DUPLICATE_IMG_ASSETS_CTE =
62 NORMALIZED_PHOTOS + " \
63 INNER JOIN ( \
64 SELECT " +
65 NORMALIZED_TITLE + ", \
66 size, \
67 orientation \
68 FROM " +
69 NORMALIZED_PHOTOS + " \
70 WHERE \
71 media_type = 1 \
72 GROUP BY " +
73 NORMALIZED_TITLE + ", \
74 size, \
75 orientation \
76 HAVING \
77 count(*) > 1 \
78 ) AS DupImg ON " + NORMALIZED_PHOTOS + "." + NORMALIZED_TITLE + " = DupImg." + NORMALIZED_TITLE + " \
79 AND " + NORMALIZED_PHOTOS + ".size = DupImg.size \
80 AND " + NORMALIZED_PHOTOS + ".orientation = DupImg.orientation ";
81
82 return SQL_ALL_DUPLICATE_IMG_ASSETS_CTE;
83 }
84
GetAllDuplicateVideoAssetsCTE()85 static std::string GetAllDuplicateVideoAssetsCTE()
86 {
87 static const std::string SQL_ALL_DUPLICATE_VID_ASSETS_CTE =
88 NORMALIZED_PHOTOS + " \
89 INNER JOIN (\
90 SELECT " +
91 NORMALIZED_TITLE + ", \
92 size, \
93 orientation \
94 FROM " +
95 NORMALIZED_PHOTOS + " \
96 WHERE \
97 media_type = 2 \
98 GROUP BY " +
99 NORMALIZED_TITLE + ", \
100 size \
101 HAVING \
102 count(*) > 1\
103 ) AS DupVid ON " + NORMALIZED_PHOTOS + "." + NORMALIZED_TITLE + " = DupVid." + NORMALIZED_TITLE + " \
104 AND " + NORMALIZED_PHOTOS + ".size = DupVid.size ";
105
106 return SQL_ALL_DUPLICATE_VID_ASSETS_CTE;
107 }
108
GetQueryAllDuplicateAssetsCountSql()109 static std::string GetQueryAllDuplicateAssetsCountSql()
110 {
111 static const std::string SQL_QUERY_ALL_DUPLICATE_ASSETS_COUNT = "\
112 WITH " + NORMALIZED_PHOTOS + " AS (" + GetNormalizedPhotosSubquery() + ") " + " \
113 SELECT \
114 count(*) \
115 FROM (SELECT file_id FROM " + GetAllDuplicateImageAssetsCTE() +
116 " UNION " +
117 "SELECT file_id FROM " + GetAllDuplicateVideoAssetsCTE() + ") ";
118
119 return SQL_QUERY_ALL_DUPLICATE_ASSETS_COUNT;
120 }
121
GetQueryAllDuplicateAssetsSql()122 static std::string GetQueryAllDuplicateAssetsSql()
123 {
124 static const std::string SQL_QUERY_ALL_DUPLICATE_ASSETS = "\
125 WITH " + NORMALIZED_PHOTOS + " AS (" + GetNormalizedPhotosSubquery() + ") " + " \
126 SELECT \
127 * \
128 FROM " + GetAllDuplicateImageAssetsCTE() + " \
129 UNION \
130 SELECT \
131 * \
132 FROM " + GetAllDuplicateVideoAssetsCTE() + " \
133 ORDER BY \
134 " + NORMALIZED_PHOTOS + "." + NORMALIZED_TITLE + ", \
135 " + NORMALIZED_PHOTOS + ".size, \
136 " + NORMALIZED_PHOTOS + ".orientation \
137 ";
138
139 return SQL_QUERY_ALL_DUPLICATE_ASSETS;
140 }
141
142 const std::string ALBUM_PRIORITY_EXPRESSION = "\
143 CASE \
144 WHEN lpath = '/DCIM/Camera' THEN \
145 0 \
146 WHEN lpath = '/Pictures/Screenshots' THEN \
147 1 \
148 WHEN lpath = '/Pictures/Screenrecords' THEN \
149 2 \
150 WHEN lpath = '/Pictures/WeiXin' THEN \
151 3 \
152 WHEN lpath IN ( '/Pictures/WeChat', '/tencent/MicroMsg/WeChat', '/Tencent/MicroMsg/WeiXin' ) THEN \
153 4 \
154 ELSE 5 \
155 END ";
156
157 const std::string TITLE_PRIORITY_EXPRESSION = "\
158 CASE \
159 WHEN title = " + NORMALIZED_TITLE + " THEN \
160 0 \
161 ELSE 1 \
162 END ";
163
GetDuplicateImageToDeleteCTE()164 static std::string GetDuplicateImageToDeleteCTE()
165 {
166 static const std::string SQL_DUPLICATE_IMG_TO_DELETE_CTE = "\
167 SELECT\
168 " + SELECT_COLUMNS + ", \
169 " + NORMALIZED_TITLE + ", \
170 ROW_NUMBER( ) OVER (\
171 PARTITION BY " + NORMALIZED_TITLE + ", \
172 size, \
173 orientation \
174 ORDER BY \
175 CASE \
176 WHEN album_id != NULL THEN \
177 0 ELSE 1 \
178 END ASC, \
179 " + ALBUM_PRIORITY_EXPRESSION + " ASC, \
180 " + TITLE_PRIORITY_EXPRESSION + " ASC \
181 ) AS row_num \
182 FROM \
183 " + NORMALIZED_PHOTOS + " \
184 LEFT JOIN PhotoAlbum ON " + NORMALIZED_PHOTOS + ".owner_album_id = PhotoAlbum.album_id \
185 WHERE \
186 media_type = 1 ";
187
188 return SQL_DUPLICATE_IMG_TO_DELETE_CTE;
189 }
190
GetDuplicateVideoToDeleteCTE()191 static std::string GetDuplicateVideoToDeleteCTE()
192 {
193 static const std::string SQL_DUPLICATE_VID_TO_DELETE_CTE = "\
194 SELECT\
195 " + SELECT_COLUMNS + ", \
196 " + NORMALIZED_TITLE + ", \
197 ROW_NUMBER( ) OVER (\
198 PARTITION BY " + NORMALIZED_TITLE + ", \
199 size \
200 ORDER BY \
201 CASE \
202 WHEN album_id != NULL THEN \
203 0 ELSE 1 \
204 END ASC, \
205 " + ALBUM_PRIORITY_EXPRESSION + " ASC, \
206 " + TITLE_PRIORITY_EXPRESSION + " ASC \
207 ) AS row_num \
208 FROM \
209 " + NORMALIZED_PHOTOS + " \
210 LEFT JOIN PhotoAlbum ON " + NORMALIZED_PHOTOS + ".owner_album_id = PhotoAlbum.album_id \
211 WHERE \
212 media_type = 2 ";
213
214 return SQL_DUPLICATE_VID_TO_DELETE_CTE;
215 }
216
GetDuplicateAssetsToDeleteSql()217 static std::string GetDuplicateAssetsToDeleteSql()
218 {
219 static const std::string SQL_QUERY_DUPLICATE_ASSETS_TO_DELETE = "\
220 WITH " + NORMALIZED_PHOTOS + " AS (" + GetNormalizedPhotosSubquery() + ") " + " \
221 SELECT \
222 " + SELECT_COLUMNS + ", \
223 " + NORMALIZED_TITLE + " \
224 FROM ( " + GetDuplicateImageToDeleteCTE() + " ) \
225 WHERE \
226 row_num > 1 \
227 UNION \
228 SELECT \
229 " + SELECT_COLUMNS + ", \
230 " + NORMALIZED_TITLE + " \
231 FROM ( " + GetDuplicateVideoToDeleteCTE() + " ) \
232 WHERE \
233 row_num > 1 \
234 ORDER BY \
235 " + NORMALIZED_TITLE + ", \
236 size, \
237 orientation \
238 ";
239
240 return SQL_QUERY_DUPLICATE_ASSETS_TO_DELETE;
241 }
242
GetDuplicateAssetsToDeleteCountSql()243 static std::string GetDuplicateAssetsToDeleteCountSql()
244 {
245 static const std::string SQL_QUERY_DUPLICATE_ASSETS_TO_DELETE = "\
246 WITH " + NORMALIZED_PHOTOS + " AS (" + GetNormalizedPhotosSubquery() + ") " + " \
247 SELECT \
248 count(*) \
249 FROM (SELECT file_id FROM (" + GetDuplicateImageToDeleteCTE() + ") WHERE row_num > 1 \
250 UNION \
251 SELECT file_id FROM (" + GetDuplicateVideoToDeleteCTE() + ") WHERE row_num > 1) ";
252
253 return SQL_QUERY_DUPLICATE_ASSETS_TO_DELETE;
254 }
255
GetSelectColumns(const std::unordered_set<std::string> & columns)256 std::string DuplicatePhotoOperation::GetSelectColumns(const std::unordered_set<std::string> &columns)
257 {
258 CHECK_AND_RETURN_RET(!columns.empty(), ASTERISK);
259
260 std::string selectColumns;
261 bool first = true;
262 for (const std::string &column : columns) {
263 if (!first) {
264 selectColumns += ", ";
265 } else {
266 first = false;
267 }
268 selectColumns += column;
269 }
270
271 return selectColumns;
272 }
273
AppendLimitOffsetClause(std::string & sql,std::vector<NativeRdb::ValueObject> & bindArgs,int limit,int offset)274 static void AppendLimitOffsetClause(std::string &sql, std::vector<NativeRdb::ValueObject>& bindArgs,
275 int limit, int offset)
276 {
277 if (limit >= 0) {
278 sql += " " + LIMIT_CLAUSE;
279 bindArgs.push_back(limit);
280 if (offset >= 0) {
281 sql += " " + OFFSET_CLAUSE;
282 bindArgs.push_back(offset);
283 }
284 }
285 }
286
GetAllDuplicateAssets(const NativeRdb::RdbPredicates & predicates,const std::vector<std::string> & columns)287 std::shared_ptr<NativeRdb::ResultSet> DuplicatePhotoOperation::GetAllDuplicateAssets(
288 const NativeRdb::RdbPredicates& predicates, const std::vector<std::string>& columns)
289 {
290 int limit = predicates.GetLimit();
291 int offset = predicates.GetOffset();
292 bool isQueryCount = find(columns.begin(), columns.end(), MEDIA_COLUMN_COUNT) != columns.end();
293 MEDIA_INFO_LOG("Limit: %{public}d, Offset: %{public}d, isQueryCount: %{public}d", limit, offset, isQueryCount);
294 auto rdbStore = MediaLibraryUnistoreManager::GetInstance().GetRdbStore();
295 CHECK_AND_RETURN_RET_LOG(rdbStore != nullptr, nullptr, "GetAllDuplicateAssets failed, rdbStore is nullptr");
296
297 MediaLibraryTracer tracer;
298 if (isQueryCount) {
299 tracer.Start("QueryAllDuplicateAssets_count");
300 std::call_once(onceFlag_, [&]() { rdbStore->ExecuteSql(IDX_DUPLICATE_ASSETS); });
301 return rdbStore->QueryByStep(GetQueryAllDuplicateAssetsCountSql());
302 }
303
304 tracer.Start("QueryAllDuplicateAssets_records");
305 std::string sql = GetQueryAllDuplicateAssetsSql();
306 std::vector<NativeRdb::ValueObject> bindArgs {};
307 AppendLimitOffsetClause(sql, bindArgs, limit, offset);
308 return rdbStore->QueryByStep(sql, bindArgs);
309 }
310
GetDuplicateAssetsToDelete(const NativeRdb::RdbPredicates & predicates,const std::vector<std::string> & columns)311 std::shared_ptr<NativeRdb::ResultSet> DuplicatePhotoOperation::GetDuplicateAssetsToDelete(
312 const NativeRdb::RdbPredicates& predicates, const std::vector<std::string>& columns)
313 {
314 int limit = predicates.GetLimit();
315 int offset = predicates.GetOffset();
316 bool isQueryCount = find(columns.begin(), columns.end(), MEDIA_COLUMN_COUNT) != columns.end();
317 MEDIA_INFO_LOG("Limit: %{public}d, Offset: %{public}d, isQueryCount: %{public}d", limit, offset, isQueryCount);
318 auto rdbStore = MediaLibraryUnistoreManager::GetInstance().GetRdbStore();
319 CHECK_AND_RETURN_RET_LOG(rdbStore != nullptr, nullptr, "GetAllDuplicateAssets failed, rdbStore is nullptr");
320
321 MediaLibraryTracer tracer;
322 if (isQueryCount) {
323 tracer.Start("QueryCanDelDuplicateAssets_count");
324 return rdbStore->QueryByStep(GetDuplicateAssetsToDeleteCountSql());
325 }
326
327 tracer.Start("QueryCanDelDuplicateAssets_records");
328 std::unordered_set<std::string> columnSet{ "file_id", "title", "size", "orientation" };
329 columnSet.insert(columns.begin(), columns.end());
330 std::string sql = GetDuplicateAssetsToDeleteSql();
331 std::string selectColumns = GetSelectColumns(columnSet);
332 MediaFileUtils::ReplaceAll(sql, SELECT_COLUMNS, selectColumns);
333 std::vector<NativeRdb::ValueObject> bindArgs {};
334 AppendLimitOffsetClause(sql, bindArgs, limit, offset);
335 return rdbStore->QueryByStep(sql, bindArgs);
336 }
337 } // namespace Media
338 } // namespace OHOS
339