1 /*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "lang_id/lang-id-wrapper.h"
18
19 #include <fcntl.h>
20
21 #include "lang_id/fb_model/lang-id-from-fb.h"
22 #include "lang_id/lang-id.h"
23
24 namespace libtextclassifier3 {
25
26 namespace langid {
27
LoadFromPath(const std::string & langid_model_path)28 std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> LoadFromPath(
29 const std::string& langid_model_path) {
30 std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> langid_model =
31 libtextclassifier3::mobile::lang_id::GetLangIdFromFlatbufferFile(langid_model_path);
32 return langid_model;
33 }
34
LoadFromDescriptor(const int langid_fd)35 std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> LoadFromDescriptor(
36 const int langid_fd) {
37 std::unique_ptr<libtextclassifier3::mobile::lang_id::LangId> langid_model =
38 libtextclassifier3::mobile::lang_id::GetLangIdFromFlatbufferFileDescriptor(
39 langid_fd);
40 return langid_model;
41 }
42
GetPredictions(const libtextclassifier3::mobile::lang_id::LangId * model,const std::string & text)43 std::vector<std::pair<std::string, float>> GetPredictions(
44 const libtextclassifier3::mobile::lang_id::LangId* model, const std::string& text) {
45 return GetPredictions(model, text.data(), text.size());
46 }
47
GetPredictions(const libtextclassifier3::mobile::lang_id::LangId * model,const char * text,int text_size)48 std::vector<std::pair<std::string, float>> GetPredictions(
49 const libtextclassifier3::mobile::lang_id::LangId* model, const char* text,
50 int text_size) {
51 std::vector<std::pair<std::string, float>> prediction_results;
52 if (model == nullptr) {
53 return prediction_results;
54 }
55
56 const float noise_threshold =
57 model->GetFloatProperty("text_classifier_langid_noise_threshold", -1.0f);
58
59 // Speed up the things by specifying the max results we want. For example, if
60 // the noise threshold is 0.1, we don't need more than 10 results.
61 const int max_results =
62 noise_threshold < 0.01
63 ? -1 // -1 means FindLanguages returns all predictions
64 : static_cast<int>(1 / noise_threshold) + 1;
65
66 libtextclassifier3::mobile::lang_id::LangIdResult langid_result;
67 model->FindLanguages(text, text_size, &langid_result, max_results);
68 for (int i = 0; i < langid_result.predictions.size(); i++) {
69 const auto& prediction = langid_result.predictions[i];
70 if (prediction.second >= noise_threshold && prediction.first != "und") {
71 prediction_results.push_back({prediction.first, prediction.second});
72 }
73 }
74 return prediction_results;
75 }
76
GetLanguageTags(const libtextclassifier3::mobile::lang_id::LangId * model,const std::string & text)77 std::string GetLanguageTags(const libtextclassifier3::mobile::lang_id::LangId* model,
78 const std::string& text) {
79 const std::vector<std::pair<std::string, float>>& predictions =
80 GetPredictions(model, text);
81 const float threshold =
82 model->GetFloatProperty("text_classifier_langid_threshold", -1.0f);
83 std::string detected_language_tags = "";
84 bool first_accepted_language = true;
85 for (int i = 0; i < predictions.size(); i++) {
86 const auto& prediction = predictions[i];
87 if (threshold >= 0.f && prediction.second < threshold) {
88 continue;
89 }
90 if (first_accepted_language) {
91 first_accepted_language = false;
92 } else {
93 detected_language_tags += ",";
94 }
95 detected_language_tags += prediction.first;
96 }
97 return detected_language_tags;
98 }
99
100 } // namespace langid
101
102 } // namespace libtextclassifier3
103