• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.language.v2;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22
23option go_package = "cloud.google.com/go/language/apiv2/languagepb;languagepb";
24option java_multiple_files = true;
25option java_outer_classname = "LanguageServiceProto";
26option java_package = "com.google.cloud.language.v2";
27
28// Provides text analysis operations such as sentiment analysis and entity
29// recognition.
30service LanguageService {
31  option (google.api.default_host) = "language.googleapis.com";
32  option (google.api.oauth_scopes) =
33      "https://www.googleapis.com/auth/cloud-language,"
34      "https://www.googleapis.com/auth/cloud-platform";
35
36  // Analyzes the sentiment of the provided text.
37  rpc AnalyzeSentiment(AnalyzeSentimentRequest)
38      returns (AnalyzeSentimentResponse) {
39    option (google.api.http) = {
40      post: "/v2/documents:analyzeSentiment"
41      body: "*"
42    };
43    option (google.api.method_signature) = "document,encoding_type";
44    option (google.api.method_signature) = "document";
45  }
46
47  // Finds named entities (currently proper names and common nouns) in the text
48  // along with entity types, probability, mentions for each entity, and
49  // other properties.
50  rpc AnalyzeEntities(AnalyzeEntitiesRequest)
51      returns (AnalyzeEntitiesResponse) {
52    option (google.api.http) = {
53      post: "/v2/documents:analyzeEntities"
54      body: "*"
55    };
56    option (google.api.method_signature) = "document,encoding_type";
57    option (google.api.method_signature) = "document";
58  }
59
60  // Classifies a document into categories.
61  rpc ClassifyText(ClassifyTextRequest) returns (ClassifyTextResponse) {
62    option (google.api.http) = {
63      post: "/v2/documents:classifyText"
64      body: "*"
65    };
66    option (google.api.method_signature) = "document";
67  }
68
69  // Moderates a document for harmful and sensitive categories.
70  rpc ModerateText(ModerateTextRequest) returns (ModerateTextResponse) {
71    option (google.api.http) = {
72      post: "/v2/documents:moderateText"
73      body: "*"
74    };
75    option (google.api.method_signature) = "document";
76  }
77
78  // A convenience method that provides all features in one call.
79  rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) {
80    option (google.api.http) = {
81      post: "/v2/documents:annotateText"
82      body: "*"
83    };
84    option (google.api.method_signature) = "document,features,encoding_type";
85    option (google.api.method_signature) = "document,features";
86  }
87}
88
89// Represents the input to API methods.
90message Document {
91  // The document types enum.
92  enum Type {
93    // The content type is not specified.
94    TYPE_UNSPECIFIED = 0;
95
96    // Plain text
97    PLAIN_TEXT = 1;
98
99    // HTML
100    HTML = 2;
101  }
102
103  // Required. If the type is not set or is `TYPE_UNSPECIFIED`,
104  // returns an `INVALID_ARGUMENT` error.
105  Type type = 1;
106
107  // The source of the document: a string containing the content or a
108  // Google Cloud Storage URI.
109  oneof source {
110    // The content of the input in string format.
111    // Cloud audit logging exempt since it is based on user data.
112    string content = 2;
113
114    // The Google Cloud Storage URI where the file content is located.
115    // This URI must be of the form: gs://bucket_name/object_name. For more
116    // details, see https://cloud.google.com/storage/docs/reference-uris.
117    // NOTE: Cloud Storage object versioning is not supported.
118    string gcs_content_uri = 3;
119  }
120
121  // Optional. The language of the document (if not specified, the language is
122  // automatically detected). Both ISO and BCP-47 language codes are
123  // accepted.<br>
124  // [Language
125  // Support](https://cloud.google.com/natural-language/docs/languages) lists
126  // currently supported languages for each API method. If the language (either
127  // specified by the caller or automatically detected) is not supported by the
128  // called API method, an `INVALID_ARGUMENT` error is returned.
129  string language_code = 4 [(google.api.field_behavior) = OPTIONAL];
130}
131
132// Represents a sentence in the input document.
133message Sentence {
134  // The sentence text.
135  TextSpan text = 1;
136
137  // For calls to [AnalyzeSentiment][] or if
138  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_document_sentiment]
139  // is set to true, this field will contain the sentiment for the sentence.
140  Sentiment sentiment = 2;
141}
142
143// Represents the text encoding that the caller uses to process the output.
144// Providing an `EncodingType` is recommended because the API provides the
145// beginning offsets for various outputs, such as tokens and mentions, and
146// languages that natively use different text encodings may access offsets
147// differently.
148enum EncodingType {
149  // If `EncodingType` is not specified, encoding-dependent information (such as
150  // `begin_offset`) will be set at `-1`.
151  NONE = 0;
152
153  // Encoding-dependent information (such as `begin_offset`) is calculated based
154  // on the UTF-8 encoding of the input. C++ and Go are examples of languages
155  // that use this encoding natively.
156  UTF8 = 1;
157
158  // Encoding-dependent information (such as `begin_offset`) is calculated based
159  // on the UTF-16 encoding of the input. Java and JavaScript are examples of
160  // languages that use this encoding natively.
161  UTF16 = 2;
162
163  // Encoding-dependent information (such as `begin_offset`) is calculated based
164  // on the UTF-32 encoding of the input. Python is an example of a language
165  // that uses this encoding natively.
166  UTF32 = 3;
167}
168
169// Represents a phrase in the text that is a known entity, such as
170// a person, an organization, or location. The API associates information, such
171// as probability and mentions, with entities.
172message Entity {
173  // The type of the entity. For most entity types, the associated metadata is a
174  // Wikipedia URL (`wikipedia_url`) and Knowledge Graph MID (`mid`). The table
175  // below lists the associated fields for entities that have different
176  // metadata.
177  enum Type {
178    // Unknown
179    UNKNOWN = 0;
180
181    // Person
182    PERSON = 1;
183
184    // Location
185    LOCATION = 2;
186
187    // Organization
188    ORGANIZATION = 3;
189
190    // Event
191    EVENT = 4;
192
193    // Artwork
194    WORK_OF_ART = 5;
195
196    // Consumer product
197    CONSUMER_GOOD = 6;
198
199    // Other types of entities
200    OTHER = 7;
201
202    // Phone number
203    //
204    // The metadata lists the phone number, formatted according to local
205    // convention, plus whichever additional elements appear in the text:
206    //
207    // * `number` - the actual number, broken down into sections as per local
208    // convention
209    // * `national_prefix` - country code, if detected
210    // * `area_code` - region or area code, if detected
211    // * `extension` - phone extension (to be dialed after connection), if
212    // detected
213    PHONE_NUMBER = 9;
214
215    // Address
216    //
217    // The metadata identifies the street number and locality plus whichever
218    // additional elements appear in the text:
219    //
220    // * `street_number` - street number
221    // * `locality` - city or town
222    // * `street_name` - street/route name, if detected
223    // * `postal_code` - postal code, if detected
224    // * `country` - country, if detected<
225    // * `broad_region` - administrative area, such as the state, if detected
226    // * `narrow_region` - smaller administrative area, such as county, if
227    // detected
228    // * `sublocality` - used in Asian addresses to demark a district within a
229    // city, if detected
230    ADDRESS = 10;
231
232    // Date
233    //
234    // The metadata identifies the components of the date:
235    //
236    // * `year` - four digit year, if detected
237    // * `month` - two digit month number, if detected
238    // * `day` - two digit day number, if detected
239    DATE = 11;
240
241    // Number
242    //
243    // The metadata is the number itself.
244    NUMBER = 12;
245
246    // Price
247    //
248    // The metadata identifies the `value` and `currency`.
249    PRICE = 13;
250  }
251
252  // The representative name for the entity.
253  string name = 1;
254
255  // The entity type.
256  Type type = 2;
257
258  // Metadata associated with the entity.
259  //
260  // For most entity types, the metadata is a Wikipedia URL (`wikipedia_url`)
261  // and Knowledge Graph MID (`mid`), if they are available. For the metadata
262  // associated with other entity types, see the Type table below.
263  map<string, string> metadata = 3;
264
265  // The mentions of this entity in the input document. The API currently
266  // supports proper noun mentions.
267  repeated EntityMention mentions = 5;
268
269  // For calls to [AnalyzeEntitySentiment][] or if
270  // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entity_sentiment]
271  // is set to true, this field will contain the aggregate sentiment expressed
272  // for this entity in the provided document.
273  Sentiment sentiment = 6;
274}
275
276// Represents the feeling associated with the entire text or entities in
277// the text.
278message Sentiment {
279  // A non-negative number in the [0, +inf) range, which represents
280  // the absolute magnitude of sentiment regardless of score (positive or
281  // negative).
282  float magnitude = 1;
283
284  // Sentiment score between -1.0 (negative sentiment) and 1.0
285  // (positive sentiment).
286  float score = 2;
287}
288
289// Represents a mention for an entity in the text. Currently, proper noun
290// mentions are supported.
291message EntityMention {
292  // The supported types of mentions.
293  enum Type {
294    // Unknown
295    TYPE_UNKNOWN = 0;
296
297    // Proper name
298    PROPER = 1;
299
300    // Common noun (or noun compound)
301    COMMON = 2;
302  }
303
304  // The mention text.
305  TextSpan text = 1;
306
307  // The type of the entity mention.
308  Type type = 2;
309
310  // For calls to [AnalyzeEntitySentiment][] or if
311  // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entity_sentiment]
312  // is set to true, this field will contain the sentiment expressed for this
313  // mention of the entity in the provided document.
314  Sentiment sentiment = 3;
315
316  // Probability score associated with the entity.
317  //
318  // The score shows the probability of the entity mention being the entity
319  // type. The score is in (0, 1] range.
320  float probability = 4;
321}
322
323// Represents a text span in the input document.
324message TextSpan {
325  // The content of the text span, which is a substring of the document.
326  string content = 1;
327
328  // The API calculates the beginning offset of the content in the original
329  // document according to the
330  // [EncodingType][google.cloud.language.v2.EncodingType] specified in the API
331  // request.
332  int32 begin_offset = 2;
333}
334
335// Represents a category returned from the text classifier.
336message ClassificationCategory {
337  // The name of the category representing the document.
338  string name = 1;
339
340  // The classifier's confidence of the category. Number represents how certain
341  // the classifier is that this category represents the given text.
342  float confidence = 2;
343}
344
345// The sentiment analysis request message.
346message AnalyzeSentimentRequest {
347  // Required. Input document.
348  Document document = 1 [(google.api.field_behavior) = REQUIRED];
349
350  // The encoding type used by the API to calculate sentence offsets.
351  EncodingType encoding_type = 2;
352}
353
354// The sentiment analysis response message.
355message AnalyzeSentimentResponse {
356  // The overall sentiment of the input document.
357  Sentiment document_sentiment = 1;
358
359  // The language of the text, which will be the same as the language specified
360  // in the request or, if not specified, the automatically-detected language.
361  // See [Document.language][] field for more details.
362  string language_code = 2;
363
364  // The sentiment for all the sentences in the document.
365  repeated Sentence sentences = 3;
366
367  // Whether the language is officially supported. The API may still return a
368  // response when the language is not supported, but it is on a best effort
369  // basis.
370  bool language_supported = 4;
371}
372
373// The entity analysis request message.
374message AnalyzeEntitiesRequest {
375  // Required. Input document.
376  Document document = 1 [(google.api.field_behavior) = REQUIRED];
377
378  // The encoding type used by the API to calculate offsets.
379  EncodingType encoding_type = 2;
380}
381
382// The entity analysis response message.
383message AnalyzeEntitiesResponse {
384  // The recognized entities in the input document.
385  repeated Entity entities = 1;
386
387  // The language of the text, which will be the same as the language specified
388  // in the request or, if not specified, the automatically-detected language.
389  // See [Document.language][] field for more details.
390  string language_code = 2;
391
392  // Whether the language is officially supported. The API may still return a
393  // response when the language is not supported, but it is on a best effort
394  // basis.
395  bool language_supported = 3;
396}
397
398// The document classification request message.
399message ClassifyTextRequest {
400  // Required. Input document.
401  Document document = 1 [(google.api.field_behavior) = REQUIRED];
402}
403
404// The document classification response message.
405message ClassifyTextResponse {
406  // Categories representing the input document.
407  repeated ClassificationCategory categories = 1;
408
409  // The language of the text, which will be the same as the language specified
410  // in the request or, if not specified, the automatically-detected language.
411  // See [Document.language][] field for more details.
412  string language_code = 2;
413
414  // Whether the language is officially supported. The API may still return a
415  // response when the language is not supported, but it is on a best effort
416  // basis.
417  bool language_supported = 3;
418}
419
420// The document moderation request message.
421message ModerateTextRequest {
422  // Required. Input document.
423  Document document = 1 [(google.api.field_behavior) = REQUIRED];
424}
425
426// The document moderation response message.
427message ModerateTextResponse {
428  // Harmful and sensitive categories representing the input document.
429  repeated ClassificationCategory moderation_categories = 1;
430
431  // The language of the text, which will be the same as the language specified
432  // in the request or, if not specified, the automatically-detected language.
433  // See [Document.language][] field for more details.
434  string language_code = 2;
435
436  // Whether the language is officially supported. The API may still return a
437  // response when the language is not supported, but it is on a best effort
438  // basis.
439  bool language_supported = 3;
440}
441
442// The request message for the text annotation API, which can perform multiple
443// analysis types in one call.
444message AnnotateTextRequest {
445  // All available features.
446  // Setting each one to true will enable that specific analysis for the input.
447  message Features {
448    // Optional. Extract entities.
449    bool extract_entities = 1 [(google.api.field_behavior) = OPTIONAL];
450
451    // Optional. Extract document-level sentiment.
452    bool extract_document_sentiment = 2
453        [(google.api.field_behavior) = OPTIONAL];
454
455    // Optional. Classify the full document into categories.
456    bool classify_text = 4 [(google.api.field_behavior) = OPTIONAL];
457
458    // Optional. Moderate the document for harmful and sensitive categories.
459    bool moderate_text = 5 [(google.api.field_behavior) = OPTIONAL];
460  }
461
462  // Required. Input document.
463  Document document = 1 [(google.api.field_behavior) = REQUIRED];
464
465  // Required. The enabled features.
466  Features features = 2 [(google.api.field_behavior) = REQUIRED];
467
468  // The encoding type used by the API to calculate offsets.
469  EncodingType encoding_type = 3;
470}
471
472// The text annotations response message.
473message AnnotateTextResponse {
474  // Sentences in the input document. Populated if the user enables
475  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_document_sentiment].
476  repeated Sentence sentences = 1;
477
478  // Entities, along with their semantic information, in the input document.
479  // Populated if the user enables
480  // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entities]
481  // or
482  // [AnnotateTextRequest.Features.extract_entity_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_entity_sentiment].
483  repeated Entity entities = 2;
484
485  // The overall sentiment for the document. Populated if the user enables
486  // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v2.AnnotateTextRequest.Features.extract_document_sentiment].
487  Sentiment document_sentiment = 3;
488
489  // The language of the text, which will be the same as the language specified
490  // in the request or, if not specified, the automatically-detected language.
491  // See [Document.language][] field for more details.
492  string language_code = 4;
493
494  // Categories identified in the input document.
495  repeated ClassificationCategory categories = 5;
496
497  // Harmful and sensitive categories identified in the input document.
498  repeated ClassificationCategory moderation_categories = 6;
499
500  // Whether the language is officially supported by all requested features.
501  // The API may still return a response when the language is not supported, but
502  // it is on a best effort basis.
503  bool language_supported = 7;
504}
505