• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #define MLOG_TAG "DuplicatePhotoOperation"
17 
18 #include "duplicate_photo_operation.h"
19 
20 #include "media_file_utils.h"
21 #include "media_log.h"
22 #include "medialibrary_rdbstore.h"
23 #include "medialibrary_tracer.h"
24 #include "medialibrary_unistore_manager.h"
25 #include "photo_query_filter.h"
26 
27 namespace OHOS {
28 namespace Media {
29 std::once_flag DuplicatePhotoOperation::onceFlag_;
30 
31 const std::string ASTERISK = "*";
32 
33 const std::string SELECT_COLUMNS = "SELECT_COLUMNS";
34 const std::string NORMALIZED_PHOTOS = "NormalizedPhotos";
35 const std::string NORMALIZED_TITLE = "normalized_title";
36 
37 const std::string LIMIT_CLAUSE = "LIMIT ?";
38 const std::string OFFSET_CLAUSE = "OFFSET ?";
39 
40 const std::string IDX_DUPLICATE_ASSETS = "\
41     CREATE INDEX \
42     IF \
43       NOT EXISTS idx_duplicate_assets ON Photos (title, size, orientation)";
44 
GetNormalizedPhotosSubquery()45 static std::string GetNormalizedPhotosSubquery()
46 {
47     static const std::string SQL_NORMALIZED_PHOTO_SUBQUERY = " \
48         SELECT \
49           *, \
50           REGEXP_REPLACE(title, '(_[0-9]{2})?((_[0-9])|(\\([0-9]*\\)))*$', '') AS " + NORMALIZED_TITLE + " \
51         FROM \
52           Photos \
53         WHERE " +
54           PhotoQueryFilter::GetSqlWhereClause(PhotoQueryFilter::Option::FILTER_VISIBLE) + " ";
55 
56     return SQL_NORMALIZED_PHOTO_SUBQUERY;
57 }
58 
GetAllDuplicateImageAssetsCTE()59 static std::string GetAllDuplicateImageAssetsCTE()
60 {
61     static const std::string SQL_ALL_DUPLICATE_IMG_ASSETS_CTE =
62         NORMALIZED_PHOTOS + " \
63         INNER JOIN ( \
64             SELECT " +
65                 NORMALIZED_TITLE + ", \
66                 size, \
67                 orientation \
68             FROM " +
69                 NORMALIZED_PHOTOS + " \
70             WHERE \
71                 media_type = 1 \
72             GROUP BY " +
73                 NORMALIZED_TITLE + ", \
74                 size, \
75                 orientation \
76             HAVING \
77                 count(*) > 1 \
78         ) AS DupImg ON " + NORMALIZED_PHOTOS + "." + NORMALIZED_TITLE + " = DupImg." + NORMALIZED_TITLE + " \
79             AND " + NORMALIZED_PHOTOS + ".size = DupImg.size \
80             AND " + NORMALIZED_PHOTOS + ".orientation = DupImg.orientation ";
81 
82     return SQL_ALL_DUPLICATE_IMG_ASSETS_CTE;
83 }
84 
GetAllDuplicateVideoAssetsCTE()85 static std::string GetAllDuplicateVideoAssetsCTE()
86 {
87     static const std::string SQL_ALL_DUPLICATE_VID_ASSETS_CTE =
88         NORMALIZED_PHOTOS + " \
89         INNER JOIN (\
90             SELECT " +
91                 NORMALIZED_TITLE + ", \
92                 size, \
93                 orientation \
94             FROM " +
95                 NORMALIZED_PHOTOS + " \
96             WHERE \
97                 media_type = 2 \
98             GROUP BY " +
99                 NORMALIZED_TITLE + ", \
100                 size \
101             HAVING \
102                 count(*) > 1\
103         ) AS DupVid ON " + NORMALIZED_PHOTOS + "." + NORMALIZED_TITLE + " = DupVid." + NORMALIZED_TITLE + " \
104             AND " + NORMALIZED_PHOTOS + ".size = DupVid.size ";
105 
106     return SQL_ALL_DUPLICATE_VID_ASSETS_CTE;
107 }
108 
GetQueryAllDuplicateAssetsCountSql()109 static std::string GetQueryAllDuplicateAssetsCountSql()
110 {
111     static const std::string SQL_QUERY_ALL_DUPLICATE_ASSETS_COUNT = "\
112         WITH " + NORMALIZED_PHOTOS + " AS (" + GetNormalizedPhotosSubquery() + ") " + " \
113         SELECT \
114             count(*) \
115         FROM (SELECT file_id FROM " + GetAllDuplicateImageAssetsCTE() +
116             " UNION " +
117             "SELECT file_id FROM " + GetAllDuplicateVideoAssetsCTE() + ") ";
118 
119     return SQL_QUERY_ALL_DUPLICATE_ASSETS_COUNT;
120 }
121 
GetQueryAllDuplicateAssetsSql()122 static std::string GetQueryAllDuplicateAssetsSql()
123 {
124     static const std::string SQL_QUERY_ALL_DUPLICATE_ASSETS = "\
125         WITH " + NORMALIZED_PHOTOS + " AS (" + GetNormalizedPhotosSubquery() + ") " + " \
126         SELECT \
127             * \
128         FROM " + GetAllDuplicateImageAssetsCTE() + " \
129         UNION \
130         SELECT \
131             * \
132         FROM " + GetAllDuplicateVideoAssetsCTE() + " \
133         ORDER BY \
134             " + NORMALIZED_PHOTOS + "." + NORMALIZED_TITLE + ", \
135             " + NORMALIZED_PHOTOS + ".size, \
136             " + NORMALIZED_PHOTOS + ".orientation \
137         ";
138 
139     return SQL_QUERY_ALL_DUPLICATE_ASSETS;
140 }
141 
142 const std::string ALBUM_PRIORITY_EXPRESSION = "\
143     CASE \
144         WHEN lpath = '/DCIM/Camera' THEN \
145         0 \
146         WHEN lpath = '/Pictures/Screenshots' THEN \
147         1 \
148         WHEN lpath = '/Pictures/Screenrecords' THEN \
149         2 \
150         WHEN lpath = '/Pictures/WeiXin' THEN \
151         3 \
152         WHEN lpath IN ( '/Pictures/WeChat', '/tencent/MicroMsg/WeChat', '/Tencent/MicroMsg/WeiXin' ) THEN \
153         4 \
154         ELSE 5 \
155     END ";
156 
157 const std::string TITLE_PRIORITY_EXPRESSION = "\
158     CASE \
159         WHEN title = " + NORMALIZED_TITLE + " THEN \
160         0 \
161         ELSE 1 \
162     END ";
163 
GetDuplicateImageToDeleteCTE()164 static std::string GetDuplicateImageToDeleteCTE()
165 {
166     static const std::string SQL_DUPLICATE_IMG_TO_DELETE_CTE = "\
167       SELECT\
168         " + SELECT_COLUMNS + ", \
169         " + NORMALIZED_TITLE + ", \
170         ROW_NUMBER( ) OVER (\
171           PARTITION BY " + NORMALIZED_TITLE + ", \
172           size, \
173           orientation \
174         ORDER BY \
175         CASE \
176           WHEN album_id != NULL THEN \
177           0 ELSE 1 \
178         END ASC, \
179         " + ALBUM_PRIORITY_EXPRESSION + " ASC, \
180         " + TITLE_PRIORITY_EXPRESSION + " ASC \
181         ) AS row_num \
182       FROM \
183         " + NORMALIZED_PHOTOS + " \
184         LEFT JOIN PhotoAlbum ON " + NORMALIZED_PHOTOS + ".owner_album_id = PhotoAlbum.album_id \
185       WHERE \
186         media_type = 1 ";
187 
188     return SQL_DUPLICATE_IMG_TO_DELETE_CTE;
189 }
190 
GetDuplicateVideoToDeleteCTE()191 static std::string GetDuplicateVideoToDeleteCTE()
192 {
193     static const std::string SQL_DUPLICATE_VID_TO_DELETE_CTE = "\
194       SELECT\
195         " + SELECT_COLUMNS + ", \
196         " + NORMALIZED_TITLE + ", \
197         ROW_NUMBER( ) OVER (\
198           PARTITION BY " + NORMALIZED_TITLE + ", \
199           size \
200         ORDER BY \
201         CASE \
202           WHEN album_id != NULL THEN \
203           0 ELSE 1 \
204         END ASC, \
205         " + ALBUM_PRIORITY_EXPRESSION + " ASC, \
206         " + TITLE_PRIORITY_EXPRESSION + " ASC \
207         ) AS row_num \
208       FROM \
209         " + NORMALIZED_PHOTOS + " \
210         LEFT JOIN PhotoAlbum ON " + NORMALIZED_PHOTOS + ".owner_album_id = PhotoAlbum.album_id \
211       WHERE \
212         media_type = 2 ";
213 
214     return SQL_DUPLICATE_VID_TO_DELETE_CTE;
215 }
216 
GetDuplicateAssetsToDeleteSql()217 static std::string GetDuplicateAssetsToDeleteSql()
218 {
219     static const std::string SQL_QUERY_DUPLICATE_ASSETS_TO_DELETE = "\
220         WITH " + NORMALIZED_PHOTOS + " AS (" + GetNormalizedPhotosSubquery() + ") " + " \
221         SELECT \
222             " + SELECT_COLUMNS + ", \
223             " + NORMALIZED_TITLE + " \
224         FROM ( " + GetDuplicateImageToDeleteCTE() + " ) \
225         WHERE \
226             row_num > 1 \
227         UNION \
228         SELECT \
229             " + SELECT_COLUMNS + ", \
230             " + NORMALIZED_TITLE + " \
231         FROM ( " + GetDuplicateVideoToDeleteCTE() + " ) \
232         WHERE \
233             row_num > 1 \
234         ORDER BY \
235             " + NORMALIZED_TITLE + ", \
236             size, \
237             orientation \
238     ";
239 
240     return SQL_QUERY_DUPLICATE_ASSETS_TO_DELETE;
241 }
242 
GetDuplicateAssetsToDeleteCountSql()243 static std::string GetDuplicateAssetsToDeleteCountSql()
244 {
245     static const std::string SQL_QUERY_DUPLICATE_ASSETS_TO_DELETE = "\
246         WITH " + NORMALIZED_PHOTOS + " AS (" + GetNormalizedPhotosSubquery() + ") " + " \
247         SELECT \
248             count(*) \
249         FROM (SELECT file_id FROM (" + GetDuplicateImageToDeleteCTE() + ") WHERE row_num > 1 \
250             UNION \
251             SELECT file_id FROM (" + GetDuplicateVideoToDeleteCTE() + ") WHERE row_num > 1) ";
252 
253     return SQL_QUERY_DUPLICATE_ASSETS_TO_DELETE;
254 }
255 
GetSelectColumns(const std::unordered_set<std::string> & columns)256 std::string DuplicatePhotoOperation::GetSelectColumns(const std::unordered_set<std::string> &columns)
257 {
258     CHECK_AND_RETURN_RET(!columns.empty(), ASTERISK);
259 
260     std::string selectColumns;
261     bool first = true;
262     for (const std::string &column : columns) {
263         if (!first) {
264             selectColumns += ", ";
265         } else {
266             first = false;
267         }
268         selectColumns += column;
269     }
270 
271     return selectColumns;
272 }
273 
AppendLimitOffsetClause(std::string & sql,std::vector<NativeRdb::ValueObject> & bindArgs,int limit,int offset)274 static void AppendLimitOffsetClause(std::string &sql, std::vector<NativeRdb::ValueObject>& bindArgs,
275     int limit, int offset)
276 {
277     if (limit >= 0) {
278         sql += " " + LIMIT_CLAUSE;
279         bindArgs.push_back(limit);
280         if (offset >= 0) {
281             sql += " " + OFFSET_CLAUSE;
282             bindArgs.push_back(offset);
283         }
284     }
285 }
286 
GetAllDuplicateAssets(const NativeRdb::RdbPredicates & predicates,const std::vector<std::string> & columns)287 std::shared_ptr<NativeRdb::ResultSet> DuplicatePhotoOperation::GetAllDuplicateAssets(
288     const NativeRdb::RdbPredicates& predicates, const std::vector<std::string>& columns)
289 {
290     int limit = predicates.GetLimit();
291     int offset = predicates.GetOffset();
292     bool isQueryCount = find(columns.begin(), columns.end(), MEDIA_COLUMN_COUNT) != columns.end();
293     MEDIA_INFO_LOG("Limit: %{public}d, Offset: %{public}d, isQueryCount: %{public}d", limit, offset, isQueryCount);
294     auto rdbStore = MediaLibraryUnistoreManager::GetInstance().GetRdbStore();
295     CHECK_AND_RETURN_RET_LOG(rdbStore != nullptr, nullptr, "GetAllDuplicateAssets failed, rdbStore is nullptr");
296 
297     MediaLibraryTracer tracer;
298     if (isQueryCount) {
299         tracer.Start("QueryAllDuplicateAssets_count");
300         std::call_once(onceFlag_, [&]() { rdbStore->ExecuteSql(IDX_DUPLICATE_ASSETS); });
301         return rdbStore->QueryByStep(GetQueryAllDuplicateAssetsCountSql());
302     }
303 
304     tracer.Start("QueryAllDuplicateAssets_records");
305     std::string sql = GetQueryAllDuplicateAssetsSql();
306     std::vector<NativeRdb::ValueObject> bindArgs {};
307     AppendLimitOffsetClause(sql, bindArgs, limit, offset);
308     return rdbStore->QueryByStep(sql, bindArgs);
309 }
310 
GetDuplicateAssetsToDelete(const NativeRdb::RdbPredicates & predicates,const std::vector<std::string> & columns)311 std::shared_ptr<NativeRdb::ResultSet> DuplicatePhotoOperation::GetDuplicateAssetsToDelete(
312     const NativeRdb::RdbPredicates& predicates, const std::vector<std::string>& columns)
313 {
314     int limit = predicates.GetLimit();
315     int offset = predicates.GetOffset();
316     bool isQueryCount = find(columns.begin(), columns.end(), MEDIA_COLUMN_COUNT) != columns.end();
317     MEDIA_INFO_LOG("Limit: %{public}d, Offset: %{public}d, isQueryCount: %{public}d", limit, offset, isQueryCount);
318     auto rdbStore = MediaLibraryUnistoreManager::GetInstance().GetRdbStore();
319     CHECK_AND_RETURN_RET_LOG(rdbStore != nullptr, nullptr, "GetAllDuplicateAssets failed, rdbStore is nullptr");
320 
321     MediaLibraryTracer tracer;
322     if (isQueryCount) {
323         tracer.Start("QueryCanDelDuplicateAssets_count");
324         return rdbStore->QueryByStep(GetDuplicateAssetsToDeleteCountSql());
325     }
326 
327     tracer.Start("QueryCanDelDuplicateAssets_records");
328     std::unordered_set<std::string> columnSet{ "file_id", "title", "size", "orientation" };
329     columnSet.insert(columns.begin(), columns.end());
330     std::string sql = GetDuplicateAssetsToDeleteSql();
331     std::string selectColumns = GetSelectColumns(columnSet);
332     MediaFileUtils::ReplaceAll(sql, SELECT_COLUMNS, selectColumns);
333     std::vector<NativeRdb::ValueObject> bindArgs {};
334     AppendLimitOffsetClause(sql, bindArgs, limit, offset);
335     return rdbStore->QueryByStep(sql, bindArgs);
336 }
337 } // namespace Media
338 } // namespace OHOS
339