• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/ruby
2
3require 'google/protobuf'
4require 'utf8_pb'
5require 'test/unit'
6
7module CaptureWarnings
8  @@warnings = nil
9
10  module_function
11
12  def warn(message, category: nil, **kwargs)
13    if @@warnings
14      @@warnings << message
15    else
16      super
17    end
18  end
19
20  def capture
21    @@warnings = []
22    yield
23    @@warnings
24  ensure
25    @@warnings = nil
26  end
27end
28
29Warning.extend CaptureWarnings
30
31module Utf8Test
32  def test_scalar
33    msg = Utf8TestProtos::TestUtf8.new
34    assert_bad_utf8 { msg.optional_string = bad_utf8_string() }
35  end
36
37  def test_repeated
38    msg = Utf8TestProtos::TestUtf8.new
39    assert_bad_utf8 { msg.repeated_string << bad_utf8_string() }
40  end
41
42  def test_map_key
43    msg = Utf8TestProtos::TestUtf8.new
44    assert_bad_utf8 { msg.map_string_string[bad_utf8_string()] = "abc" }
45  end
46
47  def test_map_value
48    msg = Utf8TestProtos::TestUtf8.new
49    assert_bad_utf8 { msg.map_string_string["abc"] = bad_utf8_string() }
50  end
51end
52
53# Tests the case of string objects that are marked UTF-8, but contain invalid
54# UTF-8.
55#
56# For now these only warn, but in the next major version they will throw an
57# exception.
58class MarkedUtf8Test < Test::Unit::TestCase
59  def assert_bad_utf8(&block)
60    warnings = CaptureWarnings.capture(&block)
61    assert_equal 1, warnings.length
62    assert_match(/String is invalid UTF-8. This will be an error in a future version./, warnings[0])
63  end
64
65  def bad_utf8_string
66    str = "\x80"
67    assert_false str.valid_encoding?
68    str
69  end
70
71  include Utf8Test
72end
73
74# This test doesn't work in JRuby because JRuby appears to have a bug where
75# the "valid" bit on a string's data is not invalidated properly when the
76# string is modified: https://github.com/jruby/jruby/issues/8316
77if !defined? JRUBY_VERSION
78  # Tests the case of string objects that are marked UTF-8, and initially contain
79  # valid UTF-8, but are later modified to be invalid UTF-8.  This may put the
80  # string into an state of "unknown" validity.
81  #
82  # For now these only warn, but in the next major version they will throw an
83  # exception.
84  class MarkedModifiedUtf8Test < Test::Unit::TestCase
85    def assert_bad_utf8(&block)
86      warnings = CaptureWarnings.capture(&block)
87      assert_equal 1, warnings.length
88      assert_match(/String is invalid UTF-8. This will be an error in a future version./, warnings[0])
89    end
90
91    def bad_utf8_string
92      str = " "
93      assert_true str.valid_encoding?
94      str[0] = "\x80"
95      str
96    end
97
98    include Utf8Test
99  end
100end
101
102# Tests the case of string objects that are marked with a non-UTF-8 encoding,
103# but contain invalid UTF-8.
104#
105# This case will raise Encoding::UndefinedConversionError.
106class MarkedNonUtf8Test < Test::Unit::TestCase
107  def assert_bad_utf8
108    assert_raises(Encoding::UndefinedConversionError) { yield }
109  end
110
111  def bad_utf8_string
112    str = "\x80".force_encoding(Encoding::ASCII_8BIT)
113    assert_true str.valid_encoding?
114    str
115  end
116
117  include Utf8Test
118end
119
120# Tests the case of string objects that are marked with a non-UTF-8 encoding,
121# but are invalid even in their source encoding.
122#
123# This case will raise Encoding::InvalidByteSequenceError
124class MarkedNonUtf8Test < Test::Unit::TestCase
125  def assert_bad_utf8(&block)
126    assert_raises(Encoding::InvalidByteSequenceError, &block)
127  end
128
129  def bad_utf8_string
130    str = "\x80".force_encoding(Encoding::ASCII)
131    assert_false str.valid_encoding?
132    str
133  end
134
135  include Utf8Test
136end
137