1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 5 * use this file except in compliance with the License. You may obtain a copy of 6 * the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 12 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 13 * License for the specific language governing permissions and limitations under 14 * the License. 15 */ 16 17 package com.android.inputmethod.research; 18 19 import com.android.inputmethod.latin.Dictionary; 20 import com.android.inputmethod.latin.Suggest; 21 22 import java.util.Random; 23 24 public class MainLogBuffer extends LogBuffer { 25 // The size of the n-grams logged. E.g. N_GRAM_SIZE = 2 means to sample bigrams. 26 private static final int N_GRAM_SIZE = 2; 27 // The number of words between n-grams to omit from the log. 28 private static final int DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES = 18; 29 30 private final ResearchLog mResearchLog; 31 private Suggest mSuggest; 32 33 // The minimum periodicity with which n-grams can be sampled. E.g. mWinWordPeriod is 10 if 34 // every 10th bigram is sampled, i.e., words 1-8 are not, but the bigram at words 9 and 10, etc. 35 // for 11-18, and the bigram at words 19 and 20. If an n-gram is not safe (e.g. it contains a 36 // number in the middle or an out-of-vocabulary word), then sampling is delayed until a safe 37 // n-gram does appear. 38 /* package for test */ int mMinWordPeriod; 39 40 // Counter for words left to suppress before an n-gram can be sampled. Reset to mMinWordPeriod 41 // after a sample is taken. 42 /* package for test */ int mWordsUntilSafeToSample; 43 MainLogBuffer(final ResearchLog researchLog)44 public MainLogBuffer(final ResearchLog researchLog) { 45 super(N_GRAM_SIZE); 46 mResearchLog = researchLog; 47 mMinWordPeriod = DEFAULT_NUMBER_OF_WORDS_BETWEEN_SAMPLES + N_GRAM_SIZE; 48 final Random random = new Random(); 49 mWordsUntilSafeToSample = random.nextInt(mMinWordPeriod); 50 } 51 setSuggest(Suggest suggest)52 public void setSuggest(Suggest suggest) { 53 mSuggest = suggest; 54 } 55 56 @Override shiftIn(final LogUnit newLogUnit)57 public void shiftIn(final LogUnit newLogUnit) { 58 super.shiftIn(newLogUnit); 59 if (newLogUnit.hasWord()) { 60 if (mWordsUntilSafeToSample > 0) { 61 mWordsUntilSafeToSample--; 62 } 63 } 64 } 65 resetWordCounter()66 public void resetWordCounter() { 67 mWordsUntilSafeToSample = mMinWordPeriod; 68 } 69 70 /** 71 * Determines whether the content of the MainLogBuffer can be safely uploaded in its complete 72 * form and still protect the user's privacy. 73 * 74 * The size of the MainLogBuffer is just enough to hold one n-gram, its corrections, and any 75 * non-character data that is typed between words. The decision about privacy is made based on 76 * the buffer's entire content. If it is decided that the privacy risks are too great to upload 77 * the contents of this buffer, a censored version of the LogItems may still be uploaded. E.g., 78 * the screen orientation and other characteristics about the device can be uploaded without 79 * revealing much about the user. 80 */ isSafeToLog()81 public boolean isSafeToLog() { 82 // Check that we are not sampling too frequently. Having sampled recently might disclose 83 // too much of the user's intended meaning. 84 if (mWordsUntilSafeToSample > 0) { 85 return false; 86 } 87 if (mSuggest == null || !mSuggest.hasMainDictionary()) { 88 // Main dictionary is unavailable. Since we cannot check it, we cannot tell if a word 89 // is out-of-vocabulary or not. Therefore, we must judge the entire buffer contents to 90 // potentially pose a privacy risk. 91 return false; 92 } 93 // Reload the dictionary in case it has changed (e.g., because the user has changed 94 // languages). 95 final Dictionary dictionary = mSuggest.getMainDictionary(); 96 if (dictionary == null) { 97 return false; 98 } 99 // Check each word in the buffer. If any word poses a privacy threat, we cannot upload the 100 // complete buffer contents in detail. 101 final int length = mLogUnits.size(); 102 for (int i = 0; i < length; i++) { 103 final LogUnit logUnit = mLogUnits.get(i); 104 final String word = logUnit.getWord(); 105 if (word == null) { 106 // Digits outside words are a privacy threat. 107 if (logUnit.hasDigit()) { 108 return false; 109 } 110 } else { 111 // Words not in the dictionary are a privacy threat. 112 if (!(dictionary.isValidWord(word))) { 113 return false; 114 } 115 } 116 } 117 // All checks have passed; this buffer's content can be safely uploaded. 118 return true; 119 } 120 121 @Override onShiftOut(LogUnit logUnit)122 protected void onShiftOut(LogUnit logUnit) { 123 if (mResearchLog != null) { 124 mResearchLog.publish(logUnit, false /* isIncludingPrivateData */); 125 } 126 } 127 } 128