1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.language.v2; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22 23option go_package = "cloud.google.com/go/language/apiv2/languagepb;languagepb"; 24option java_multiple_files = true; 25option java_outer_classname = "LanguageServiceProto"; 26option java_package = "com.google.cloud.language.v2"; 27 28// Provides text analysis operations such as sentiment analysis and entity 29// recognition. 30service LanguageService { 31 option (google.api.default_host) = "language.googleapis.com"; 32 option (google.api.oauth_scopes) = 33 "https://www.googleapis.com/auth/cloud-language," 34 "https://www.googleapis.com/auth/cloud-platform"; 35 36 // Analyzes the sentiment of the provided text. 37 rpc AnalyzeSentiment(AnalyzeSentimentRequest) 38 returns (AnalyzeSentimentResponse) { 39 option (google.api.http) = { 40 post: "/v2/documents:analyzeSentiment" 41 body: "*" 42 }; 43 option (google.api.method_signature) = "document,encoding_type"; 44 option (google.api.method_signature) = "document"; 45 } 46 47 // Finds named entities (currently proper names and common nouns) in the text 48 // along with entity types, probability, mentions for each entity, and 49 // other properties. 50 rpc AnalyzeEntities(AnalyzeEntitiesRequest) 51 returns (AnalyzeEntitiesResponse) { 52 option (google.api.http) = { 53 post: "/v2/documents:analyzeEntities" 54 body: "*" 55 }; 56 option (google.api.method_signature) = "document,encoding_type"; 57 option (google.api.method_signature) = "document"; 58 } 59 60 // Classifies a document into categories. 61 rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) { 62 option (google.api.http) = { 63 post: "/v2/documents:classifyText" 64 body: "*" 65 }; 66 option (google.api.method_signature) = "document"; 67 } 68 69 // Moderates a document for harmful and sensitive categories. 70 rpc ModerateText(ModerateTextRequest) returns (ModerateTextResponse) { 71 option (google.api.http) = { 72 post: "/v2/documents:moderateText" 73 body: "*" 74 }; 75 option (google.api.method_signature) = "document"; 76 } 77 78 // A convenience method that provides all features in one call. 79 rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) { 80 option (google.api.http) = { 81 post: "/v2/documents:annotateText" 82 body: "*" 83 }; 84 option (google.api.method_signature) = "document,features,encoding_type"; 85 option (google.api.method_signature) = "document,features"; 86 } 87} 88 89// Represents the input to API methods. 90message Document { 91 // The document types enum. 92 enum Type { 93 // The content type is not specified. 94 TYPE_UNSPECIFIED = 0; 95 96 // Plain text 97 PLAIN_TEXT = 1; 98 99 // HTML 100 HTML = 2; 101 } 102 103 // Required. If the type is not set or is `TYPE_UNSPECIFIED`, 104 // returns an `INVALID_ARGUMENT` error. 105 Type type = 1; 106 107 // The source of the document: a string containing the content or a 108 // Google Cloud Storage URI. 109 oneof source { 110 // The content of the input in string format. 111 // Cloud audit logging exempt since it is based on user data. 112 string content = 2; 113 114 // The Google Cloud Storage URI where the file content is located. 115 // This URI must be of the form: gs://bucket_name/object_name. For more 116 // details, see https://cloud.google.com/storage/docs/reference-uris. 117 // NOTE: Cloud Storage object versioning is not supported. 118 string gcs_content_uri = 3; 119 } 120 121 // Optional. The language of the document (if not specified, the language is 122 // automatically detected). Both ISO and BCP-47 language codes are 123 // accepted.<br> 124 // [Language 125 // Support](https://cloud.google.com/natural-language/docs/languages) lists 126 // currently supported languages for each API method. If the language (either 127 // specified by the caller or automatically detected) is not supported by the 128 // called API method, an `INVALID_ARGUMENT` error is returned. 129 string language_code = 4 [(google.api.field_behavior) = OPTIONAL]; 130} 131 132// Represents a sentence in the input document. 133message Sentence { 134 // The sentence text. 135 TextSpan text = 1; 136 137 // For calls to [AnalyzeSentiment][] or if 138 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_document_sentiment] 139 // is set to true, this field will contain the sentiment for the sentence. 140 Sentiment sentiment = 2; 141} 142 143// Represents the text encoding that the caller uses to process the output. 144// Providing an `EncodingType` is recommended because the API provides the 145// beginning offsets for various outputs, such as tokens and mentions, and 146// languages that natively use different text encodings may access offsets 147// differently. 148enum EncodingType { 149 // If `EncodingType` is not specified, encoding-dependent information (such as 150 // `begin_offset`) will be set at `-1`. 151 NONE = 0; 152 153 // Encoding-dependent information (such as `begin_offset`) is calculated based 154 // on the UTF-8 encoding of the input. C++ and Go are examples of languages 155 // that use this encoding natively. 156 UTF8 = 1; 157 158 // Encoding-dependent information (such as `begin_offset`) is calculated based 159 // on the UTF-16 encoding of the input. Java and JavaScript are examples of 160 // languages that use this encoding natively. 161 UTF16 = 2; 162 163 // Encoding-dependent information (such as `begin_offset`) is calculated based 164 // on the UTF-32 encoding of the input. Python is an example of a language 165 // that uses this encoding natively. 166 UTF32 = 3; 167} 168 169// Represents a phrase in the text that is a known entity, such as 170// a person, an organization, or location. The API associates information, such 171// as probability and mentions, with entities. 172message Entity { 173 // The type of the entity. For most entity types, the associated metadata is a 174 // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table 175 // below lists the associated fields for entities that have different 176 // metadata. 177 enum Type { 178 // Unknown 179 UNKNOWN = 0; 180 181 // Person 182 PERSON = 1; 183 184 // Location 185 LOCATION = 2; 186 187 // Organization 188 ORGANIZATION = 3; 189 190 // Event 191 EVENT = 4; 192 193 // Artwork 194 WORK_OF_ART = 5; 195 196 // Consumer product 197 CONSUMER_GOOD = 6; 198 199 // Other types of entities 200 OTHER = 7; 201 202 // Phone number 203 // 204 // The metadata lists the phone number, formatted according to local 205 // convention, plus whichever additional elements appear in the text: 206 // 207 // * `number` - the actual number, broken down into sections as per local 208 // convention 209 // * `national_prefix` - country code, if detected 210 // * `area_code` - region or area code, if detected 211 // * `extension` - phone extension (to be dialed after connection), if 212 // detected 213 PHONE_NUMBER = 9; 214 215 // Address 216 // 217 // The metadata identifies the street number and locality plus whichever 218 // additional elements appear in the text: 219 // 220 // * `street_number` - street number 221 // * `locality` - city or town 222 // * `street_name` - street/route name, if detected 223 // * `postal_code` - postal code, if detected 224 // * `country` - country, if detected< 225 // * `broad_region` - administrative area, such as the state, if detected 226 // * `narrow_region` - smaller administrative area, such as county, if 227 // detected 228 // * `sublocality` - used in Asian addresses to demark a district within a 229 // city, if detected 230 ADDRESS = 10; 231 232 // Date 233 // 234 // The metadata identifies the components of the date: 235 // 236 // * `year` - four digit year, if detected 237 // * `month` - two digit month number, if detected 238 // * `day` - two digit day number, if detected 239 DATE = 11; 240 241 // Number 242 // 243 // The metadata is the number itself. 244 NUMBER = 12; 245 246 // Price 247 // 248 // The metadata identifies the `value` and `currency`. 249 PRICE = 13; 250 } 251 252 // The representative name for the entity. 253 string name = 1; 254 255 // The entity type. 256 Type type = 2; 257 258 // Metadata associated with the entity. 259 // 260 // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`) 261 // and Knowledge Graph MID (`mid`), if they are available. For the metadata 262 // associated with other entity types, see the Type table below. 263 map<string, string> metadata = 3; 264 265 // The mentions of this entity in the input document. The API currently 266 // supports proper noun mentions. 267 repeated EntityMention mentions = 5; 268 269 // For calls to [AnalyzeEntitySentiment][] or if 270 // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entity_sentiment] 271 // is set to true, this field will contain the aggregate sentiment expressed 272 // for this entity in the provided document. 273 Sentiment sentiment = 6; 274} 275 276// Represents the feeling associated with the entire text or entities in 277// the text. 278message Sentiment { 279 // A non-negative number in the [0, +inf) range, which represents 280 // the absolute magnitude of sentiment regardless of score (positive or 281 // negative). 282 float magnitude = 1; 283 284 // Sentiment score between -1.0 (negative sentiment) and 1.0 285 // (positive sentiment). 286 float score = 2; 287} 288 289// Represents a mention for an entity in the text. Currently, proper noun 290// mentions are supported. 291message EntityMention { 292 // The supported types of mentions. 293 enum Type { 294 // Unknown 295 TYPE_UNKNOWN = 0; 296 297 // Proper name 298 PROPER = 1; 299 300 // Common noun (or noun compound) 301 COMMON = 2; 302 } 303 304 // The mention text. 305 TextSpan text = 1; 306 307 // The type of the entity mention. 308 Type type = 2; 309 310 // For calls to [AnalyzeEntitySentiment][] or if 311 // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entity_sentiment] 312 // is set to true, this field will contain the sentiment expressed for this 313 // mention of the entity in the provided document. 314 Sentiment sentiment = 3; 315 316 // Probability score associated with the entity. 317 // 318 // The score shows the probability of the entity mention being the entity 319 // type. The score is in (0, 1] range. 320 float probability = 4; 321} 322 323// Represents a text span in the input document. 324message TextSpan { 325 // The content of the text span, which is a substring of the document. 326 string content = 1; 327 328 // The API calculates the beginning offset of the content in the original 329 // document according to the 330 // [EncodingType][google.cloud.language.v2.EncodingType] specified in the API 331 // request. 332 int32 begin_offset = 2; 333} 334 335// Represents a category returned from the text classifier. 336message ClassificationCategory { 337 // The name of the category representing the document. 338 string name = 1; 339 340 // The classifier's confidence of the category. Number represents how certain 341 // the classifier is that this category represents the given text. 342 float confidence = 2; 343} 344 345// The sentiment analysis request message. 346message AnalyzeSentimentRequest { 347 // Required. Input document. 348 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 349 350 // The encoding type used by the API to calculate sentence offsets. 351 EncodingType encoding_type = 2; 352} 353 354// The sentiment analysis response message. 355message AnalyzeSentimentResponse { 356 // The overall sentiment of the input document. 357 Sentiment document_sentiment = 1; 358 359 // The language of the text, which will be the same as the language specified 360 // in the request or, if not specified, the automatically-detected language. 361 // See [Document.language][] field for more details. 362 string language_code = 2; 363 364 // The sentiment for all the sentences in the document. 365 repeated Sentence sentences = 3; 366 367 // Whether the language is officially supported. The API may still return a 368 // response when the language is not supported, but it is on a best effort 369 // basis. 370 bool language_supported = 4; 371} 372 373// The entity analysis request message. 374message AnalyzeEntitiesRequest { 375 // Required. Input document. 376 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 377 378 // The encoding type used by the API to calculate offsets. 379 EncodingType encoding_type = 2; 380} 381 382// The entity analysis response message. 383message AnalyzeEntitiesResponse { 384 // The recognized entities in the input document. 385 repeated Entity entities = 1; 386 387 // The language of the text, which will be the same as the language specified 388 // in the request or, if not specified, the automatically-detected language. 389 // See [Document.language][] field for more details. 390 string language_code = 2; 391 392 // Whether the language is officially supported. The API may still return a 393 // response when the language is not supported, but it is on a best effort 394 // basis. 395 bool language_supported = 3; 396} 397 398// The document classification request message. 399message ClassifyTextRequest { 400 // Required. Input document. 401 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 402} 403 404// The document classification response message. 405message ClassifyTextResponse { 406 // Categories representing the input document. 407 repeated ClassificationCategory categories = 1; 408 409 // The language of the text, which will be the same as the language specified 410 // in the request or, if not specified, the automatically-detected language. 411 // See [Document.language][] field for more details. 412 string language_code = 2; 413 414 // Whether the language is officially supported. The API may still return a 415 // response when the language is not supported, but it is on a best effort 416 // basis. 417 bool language_supported = 3; 418} 419 420// The document moderation request message. 421message ModerateTextRequest { 422 // Required. Input document. 423 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 424} 425 426// The document moderation response message. 427message ModerateTextResponse { 428 // Harmful and sensitive categories representing the input document. 429 repeated ClassificationCategory moderation_categories = 1; 430 431 // The language of the text, which will be the same as the language specified 432 // in the request or, if not specified, the automatically-detected language. 433 // See [Document.language][] field for more details. 434 string language_code = 2; 435 436 // Whether the language is officially supported. The API may still return a 437 // response when the language is not supported, but it is on a best effort 438 // basis. 439 bool language_supported = 3; 440} 441 442// The request message for the text annotation API, which can perform multiple 443// analysis types in one call. 444message AnnotateTextRequest { 445 // All available features. 446 // Setting each one to true will enable that specific analysis for the input. 447 message Features { 448 // Optional. Extract entities. 449 bool extract_entities = 1 [(google.api.field_behavior) = OPTIONAL]; 450 451 // Optional. Extract document-level sentiment. 452 bool extract_document_sentiment = 2 453 [(google.api.field_behavior) = OPTIONAL]; 454 455 // Optional. Classify the full document into categories. 456 bool classify_text = 4 [(google.api.field_behavior) = OPTIONAL]; 457 458 // Optional. Moderate the document for harmful and sensitive categories. 459 bool moderate_text = 5 [(google.api.field_behavior) = OPTIONAL]; 460 } 461 462 // Required. Input document. 463 Document document = 1 [(google.api.field_behavior) = REQUIRED]; 464 465 // Required. The enabled features. 466 Features features = 2 [(google.api.field_behavior) = REQUIRED]; 467 468 // The encoding type used by the API to calculate offsets. 469 EncodingType encoding_type = 3; 470} 471 472// The text annotations response message. 473message AnnotateTextResponse { 474 // Sentences in the input document. Populated if the user enables 475 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_document_sentiment]. 476 repeated Sentence sentences = 1; 477 478 // Entities, along with their semantic information, in the input document. 479 // Populated if the user enables 480 // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entities] 481 // or 482 // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entity_sentiment]. 483 repeated Entity entities = 2; 484 485 // The overall sentiment for the document. Populated if the user enables 486 // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_document_sentiment]. 487 Sentiment document_sentiment = 3; 488 489 // The language of the text, which will be the same as the language specified 490 // in the request or, if not specified, the automatically-detected language. 491 // See [Document.language][] field for more details. 492 string language_code = 4; 493 494 // Categories identified in the input document. 495 repeated ClassificationCategory categories = 5; 496 497 // Harmful and sensitive categories identified in the input document. 498 repeated ClassificationCategory moderation_categories = 6; 499 500 // Whether the language is officially supported by all requested features. 501 // The API may still return a response when the language is not supported, but 502 // it is on a best effort basis. 503 bool language_supported = 7; 504} 505