1#!/usr/bin/ruby 2 3require 'google/protobuf' 4require 'utf8_pb' 5require 'test/unit' 6 7module CaptureWarnings 8 @@warnings = nil 9 10 module_function 11 12 def warn(message, category: nil, **kwargs) 13 if @@warnings 14 @@warnings << message 15 else 16 super 17 end 18 end 19 20 def capture 21 @@warnings = [] 22 yield 23 @@warnings 24 ensure 25 @@warnings = nil 26 end 27end 28 29Warning.extend CaptureWarnings 30 31module Utf8Test 32 def test_scalar 33 msg = Utf8TestProtos::TestUtf8.new 34 assert_bad_utf8 { msg.optional_string = bad_utf8_string() } 35 end 36 37 def test_repeated 38 msg = Utf8TestProtos::TestUtf8.new 39 assert_bad_utf8 { msg.repeated_string << bad_utf8_string() } 40 end 41 42 def test_map_key 43 msg = Utf8TestProtos::TestUtf8.new 44 assert_bad_utf8 { msg.map_string_string[bad_utf8_string()] = "abc" } 45 end 46 47 def test_map_value 48 msg = Utf8TestProtos::TestUtf8.new 49 assert_bad_utf8 { msg.map_string_string["abc"] = bad_utf8_string() } 50 end 51end 52 53# Tests the case of string objects that are marked UTF-8, but contain invalid 54# UTF-8. 55# 56# For now these only warn, but in the next major version they will throw an 57# exception. 58class MarkedUtf8Test < Test::Unit::TestCase 59 def assert_bad_utf8(&block) 60 warnings = CaptureWarnings.capture(&block) 61 assert_equal 1, warnings.length 62 assert_match(/String is invalid UTF-8. This will be an error in a future version./, warnings[0]) 63 end 64 65 def bad_utf8_string 66 str = "\x80" 67 assert_false str.valid_encoding? 68 str 69 end 70 71 include Utf8Test 72end 73 74# This test doesn't work in JRuby because JRuby appears to have a bug where 75# the "valid" bit on a string's data is not invalidated properly when the 76# string is modified: https://github.com/jruby/jruby/issues/8316 77if !defined? JRUBY_VERSION 78 # Tests the case of string objects that are marked UTF-8, and initially contain 79 # valid UTF-8, but are later modified to be invalid UTF-8. This may put the 80 # string into an state of "unknown" validity. 81 # 82 # For now these only warn, but in the next major version they will throw an 83 # exception. 84 class MarkedModifiedUtf8Test < Test::Unit::TestCase 85 def assert_bad_utf8(&block) 86 warnings = CaptureWarnings.capture(&block) 87 assert_equal 1, warnings.length 88 assert_match(/String is invalid UTF-8. This will be an error in a future version./, warnings[0]) 89 end 90 91 def bad_utf8_string 92 str = " " 93 assert_true str.valid_encoding? 94 str[0] = "\x80" 95 str 96 end 97 98 include Utf8Test 99 end 100end 101 102# Tests the case of string objects that are marked with a non-UTF-8 encoding, 103# but contain invalid UTF-8. 104# 105# This case will raise Encoding::UndefinedConversionError. 106class MarkedNonUtf8Test < Test::Unit::TestCase 107 def assert_bad_utf8 108 assert_raises(Encoding::UndefinedConversionError) { yield } 109 end 110 111 def bad_utf8_string 112 str = "\x80".force_encoding(Encoding::ASCII_8BIT) 113 assert_true str.valid_encoding? 114 str 115 end 116 117 include Utf8Test 118end 119 120# Tests the case of string objects that are marked with a non-UTF-8 encoding, 121# but are invalid even in their source encoding. 122# 123# This case will raise Encoding::InvalidByteSequenceError 124class MarkedNonUtf8Test < Test::Unit::TestCase 125 def assert_bad_utf8(&block) 126 assert_raises(Encoding::InvalidByteSequenceError, &block) 127 end 128 129 def bad_utf8_string 130 str = "\x80".force_encoding(Encoding::ASCII) 131 assert_false str.valid_encoding? 132 str 133 end 134 135 include Utf8Test 136end 137