diff --git a/bench/micro/file/write.rb b/bench/micro/file/write.rb index 1955e6072c32..a3e36bb39787 100644 --- a/bench/micro/file/write.rb +++ b/bench/micro/file/write.rb @@ -10,8 +10,8 @@ kilobyte = 'x' * 1024 -if defined?(Truffle::Ropes.flatten_rope) - kilobyte = Truffle::Ropes.flatten_rope(kilobyte) +if defined?(Truffle::Debug.flatten_string) + kilobyte = Truffle::Debug.flatten_string(kilobyte) end benchmark 'core-write-kilobyte' do @@ -20,8 +20,8 @@ gigabyte = 'x' * 1024 * 1024 * 1024 -if defined?(Truffle::Ropes.flatten_rope) - gigabyte = Truffle::Ropes.flatten_rope(gigabyte) +if defined?(Truffle::Debug.flatten_string) + gigabyte = Truffle::Debug.flatten_string(gigabyte) end benchmark 'core-write-gigabyte' do diff --git a/bench/micro/string/flatten.rb b/bench/micro/string/flatten.rb deleted file mode 100644 index 87f59b8ca5bb..000000000000 --- a/bench/micro/string/flatten.rb +++ /dev/null @@ -1,21 +0,0 @@ -# Copyright (c) 2020 Oracle and/or its affiliates. All rights reserved. This -# code is released under a tri EPL/GPL/LGPL license. You can use it, -# redistribute it and/or modify it under the terms of the: -# -# Eclipse Public License version 2.0, or -# GNU General Public License version 2, or -# GNU Lesser General Public License version 2.1. - -if RUBY_ENGINE == 'truffleruby' - str = "x" - 100.times do - str = "ab#{str}yz" - end - - flat = Truffle::Ropes.flatten_rope(str) - # Truffle::Ropes.debug_print_rope(str) - - benchmark "core-string-flatten" do - flat = Truffle::Ropes.flatten_rope(str) - end -end diff --git a/bench/micro/string/substring.rb b/bench/micro/string/substring.rb index 993e306b3ae4..d73e9a629049 100644 --- a/bench/micro/string/substring.rb +++ b/bench/micro/string/substring.rb @@ -9,7 +9,7 @@ side = 512 * 1024 big_string = ("a".b * side + "é".b + "z".b * side)[1...-1] result = big_string.byteslice(4, 8) -# Truffle::Ropes.debug_print_rope(big_string, false) +# Truffle::Debug.tstring_to_debug_string(big_string) benchmark "core-string-many-substrings-of-large-substring" do i = 0 diff --git a/doc/contributor/truffle-string.md b/doc/contributor/truffle-string.md new file mode 100644 index 000000000000..7915ccdd6914 --- /dev/null +++ b/doc/contributor/truffle-string.md @@ -0,0 +1,37 @@ +# TruffleString in TruffleRuby + +TruffleRuby uses `TruffleString` to represent Ruby Strings, but wraps them in either a RubyString or a ImmutableRubyString object. + +## Encodings Compatibility + +The notion of encodings compatibility is mostly the same between Ruby and TruffleString but differs in one point: +* An empty Ruby String is always considered compatible with any other Ruby String of any encoding. +* TruffleString does not consider whether a string is empty or not, and only look at their encodings and code range. + +As a result, to use TruffleString equality nodes, one needs to: +1. Compute the compatible encoding with `NegotiateCompatibleStringEncodingNode` or `Primitive.encoding_ensure_compatible_str`. +2. Check if both sides are empty, and if so return true before using TruffleString equality nodes. + +`StringHelperNodes.StringEqualInternalNode` is a good example showing what is needed. + +An example which would throw without empty checks is comparing an empty ISO-2022-JP (a dummy, non-ascii-compatible, fixed-width encoding) string with an empty US-ASCII string: + +```bash +$ jt ruby -e '"".force_encoding("ISO-2022-JP") == ""' +the given string is not compatible to the expected encoding "ISO_2022_JP", did you forget to convert it? (java.lang.IllegalArgumentException) +``` + +## Logical vs Physical Byte Offsets + +We categorize a byte offset into a `TruffleString` as either *logical* or *physical*. +A physical byte offset includes the offset from the `InternalByteArray` (`InternalByteArray#getOffset()`). +A logical byte offset does not include that and is the semantic byte offset from the start of the string. +Physical offsets are quite difficult to use and they are error-prone as they can be passed by mistake to a method taking a logical offset. +So avoid physical offsets as much as possible, and therefore avoid `InternalByteArray#getArray()`. + +## Tests + +This is a good set of tests to run when touching String code: +``` +jt test integration strict-encoding-checks +``` diff --git a/lib/cext/ABI_check.txt b/lib/cext/ABI_check.txt index 7f8f011eb73d..45a4fb75db86 100644 --- a/lib/cext/ABI_check.txt +++ b/lib/cext/ABI_check.txt @@ -1 +1 @@ -7 +8 diff --git a/lib/truffle/truffle/cext.rb b/lib/truffle/truffle/cext.rb index 7f2e97dc62d6..2b7773d1dc3e 100644 --- a/lib/truffle/truffle/cext.rb +++ b/lib/truffle/truffle/cext.rb @@ -692,10 +692,6 @@ def rb_thread_alone Thread.list.count == 1 ? 1 : 0 end - def rb_intern(str) - str.intern - end - def rb_int_positive_pow(a, b) a ** b end @@ -809,14 +805,6 @@ def rb_enc_get_index(obj) enc end - def rb_intern_str(string) - string.intern - end - - def rb_intern3(string, enc) - string.force_encoding(enc).intern - end - def rb_str_append(str, to_append) Primitive.string_append(str, to_append) end @@ -1766,7 +1754,7 @@ def rb_gv_get(name) end def rb_reg_match(re, str) - result = str ? Truffle::RegexpOperations.match(re, str, 0) : nil + result = Truffle::RegexpOperations.match(re, str, 0) Primitive.regexp_last_match_set(rb_get_special_vars(), result) result.begin(0) if result diff --git a/spec/ruby/core/file/shared/fnmatch.rb b/spec/ruby/core/file/shared/fnmatch.rb index 00682bb64cec..94f22144b060 100644 --- a/spec/ruby/core/file/shared/fnmatch.rb +++ b/spec/ruby/core/file/shared/fnmatch.rb @@ -159,10 +159,10 @@ end it "does not match leading periods in filenames with wildcards by default" do - File.send(@method, '*', '.profile').should == false - File.send(@method, '*', 'home/.profile').should == true - File.send(@method, '*/*', 'home/.profile').should == true - File.send(@method, '*/*', 'dave/.profile', File::FNM_PATHNAME).should == false + File.should_not.send(@method, '*', '.profile') + File.should.send(@method, '*', 'home/.profile') + File.should.send(@method, '*/*', 'home/.profile') + File.should_not.send(@method, '*/*', 'dave/.profile', File::FNM_PATHNAME) end it "matches patterns with leading periods to dotfiles by default" do diff --git a/spec/ruby/core/regexp/shared/quote.rb b/spec/ruby/core/regexp/shared/quote.rb index 33bdfd997939..953310276692 100644 --- a/spec/ruby/core/regexp/shared/quote.rb +++ b/spec/ruby/core/regexp/shared/quote.rb @@ -17,6 +17,11 @@ Regexp.send(@method, str).should == '\+\[\]\(' end + it "works for broken strings" do + Regexp.send(@method, "a.\x85b.".force_encoding("US-ASCII")).should =="a\\.\x85b\\.".force_encoding("US-ASCII") + Regexp.send(@method, "a.\x80".force_encoding("UTF-8")).should == "a\\.\x80".force_encoding("UTF-8") + end + it "sets the encoding of the result to US-ASCII if there are only US-ASCII characters present in the input String" do str = "abc".force_encoding("euc-jp") Regexp.send(@method, str).encoding.should == Encoding::US_ASCII diff --git a/spec/ruby/core/string/capitalize_spec.rb b/spec/ruby/core/string/capitalize_spec.rb index 8afaefc02107..751f4160a67f 100644 --- a/spec/ruby/core/string/capitalize_spec.rb +++ b/spec/ruby/core/string/capitalize_spec.rb @@ -10,6 +10,7 @@ "hello".capitalize.should == "Hello" "HELLO".capitalize.should == "Hello" "123ABC".capitalize.should == "123abc" + "abcdef"[1...-1].capitalize.should == "Bcde" end describe "full Unicode case mapping" do @@ -37,7 +38,7 @@ end it "handles non-ASCII substrings properly" do - "garçon"[1..-1].capitalize(:ascii).should == "Arçon" + "garçon"[1...-1].capitalize(:ascii).should == "Arço" end end diff --git a/spec/ruby/core/string/delete_prefix_spec.rb b/spec/ruby/core/string/delete_prefix_spec.rb index a063e443d8d8..17ce18bccad8 100644 --- a/spec/ruby/core/string/delete_prefix_spec.rb +++ b/spec/ruby/core/string/delete_prefix_spec.rb @@ -21,6 +21,10 @@ r.should == s end + it "does not remove partial bytes, only full characters" do + "\xe3\x81\x82".delete_prefix("\xe3").should == "\xe3\x81\x82" + end + it "doesn't set $~" do $~ = nil diff --git a/spec/ruby/core/string/delete_suffix_spec.rb b/spec/ruby/core/string/delete_suffix_spec.rb index 3d3274bc5b7e..0705c732463a 100644 --- a/spec/ruby/core/string/delete_suffix_spec.rb +++ b/spec/ruby/core/string/delete_suffix_spec.rb @@ -21,6 +21,10 @@ r.should == s end + it "does not remove partial bytes, only full characters" do + "\xe3\x81\x82".delete_suffix("\x82").should == "\xe3\x81\x82" + end + it "doesn't set $~" do $~ = nil diff --git a/spec/ruby/core/string/downcase_spec.rb b/spec/ruby/core/string/downcase_spec.rb index 86d848088906..f0a15f1e25f1 100644 --- a/spec/ruby/core/string/downcase_spec.rb +++ b/spec/ruby/core/string/downcase_spec.rb @@ -27,6 +27,10 @@ it "does not downcase non-ASCII characters" do "CÅR".downcase(:ascii).should == "cÅr" end + + it "works with substrings" do + "prefix TÉ"[-2..-1].downcase(:ascii).should == "tÉ" + end end describe "full Unicode case mapping adapted for Turkic languages" do diff --git a/spec/ruby/core/string/include_spec.rb b/spec/ruby/core/string/include_spec.rb index e32eb17c29e2..23e1e134ec73 100644 --- a/spec/ruby/core/string/include_spec.rb +++ b/spec/ruby/core/string/include_spec.rb @@ -13,6 +13,20 @@ StringSpecs::MyString.new("hello").include?(StringSpecs::MyString.new("lo")).should == true end + it "returns true if both strings are empty" do + "".should.include?("") + "".force_encoding("EUC-JP").should.include?("") + "".should.include?("".force_encoding("EUC-JP")) + "".force_encoding("EUC-JP").should.include?("".force_encoding("EUC-JP")) + end + + it "returns true if the RHS is empty" do + "a".should.include?("") + "a".force_encoding("EUC-JP").should.include?("") + "a".should.include?("".force_encoding("EUC-JP")) + "a".force_encoding("EUC-JP").should.include?("".force_encoding("EUC-JP")) + end + it "tries to convert other to string using to_str" do other = mock('lo') other.should_receive(:to_str).and_return("lo") diff --git a/spec/ruby/core/string/inspect_spec.rb b/spec/ruby/core/string/inspect_spec.rb index 8bfd465144d6..8bf3d3161fba 100644 --- a/spec/ruby/core/string/inspect_spec.rb +++ b/spec/ruby/core/string/inspect_spec.rb @@ -19,6 +19,21 @@ ].should be_computed_by(:inspect) end + it "returns a string with special characters replaced with \\ notation for UTF-16" do + pairs = [ + ["\a", '"\\a"'], + ["\b", '"\\b"'], + ["\t", '"\\t"'], + ["\n", '"\\n"'], + ["\v", '"\\v"'], + ["\f", '"\\f"'], + ["\r", '"\\r"'], + ["\e", '"\\e"'] + ].map { |str, result| [str.encode('UTF-16LE'), result] } + + pairs.should be_computed_by(:inspect) + end + it "returns a string with \" and \\ escaped with a backslash" do [ ["\"", '"\\""'], ["\\", '"\\\\"'] @@ -311,6 +326,11 @@ "\xF0\x9F".inspect.should == '"\\xF0\\x9F"' end + it "works for broken US-ASCII strings" do + s = "©".force_encoding("US-ASCII") + s.inspect.should == '"\xC2\xA9"' + end + describe "when default external is UTF-8" do before :each do @extenc, Encoding.default_external = Encoding.default_external, Encoding::UTF_8 diff --git a/spec/ruby/core/string/lstrip_spec.rb b/spec/ruby/core/string/lstrip_spec.rb index 02bc6b4322f6..75434613f18b 100644 --- a/spec/ruby/core/string/lstrip_spec.rb +++ b/spec/ruby/core/string/lstrip_spec.rb @@ -10,6 +10,14 @@ " hello world ".lstrip.should == "hello world " "\n\r\t\n\v\r hello world ".lstrip.should == "hello world " "hello".lstrip.should == "hello" + " こにちわ".lstrip.should == "こにちわ" + end + + it "works with lazy substrings" do + " hello "[1...-1].lstrip.should == "hello " + " hello world "[1...-1].lstrip.should == "hello world " + "\n\r\t\n\v\r hello world "[1...-1].lstrip.should == "hello world " + " こにちわ "[1...-1].lstrip.should == "こにちわ" end ruby_version_is '3.0' do @@ -27,20 +35,26 @@ a.should == "hello " end + it "returns nil if no modifications were made" do + a = "hello" + a.lstrip!.should == nil + a.should == "hello" + end + + it "makes a string empty if it is only whitespace" do + "".lstrip!.should == nil + " ".lstrip.should == "" + " ".lstrip.should == "" + end + ruby_version_is '3.0' do - it "strips leading \\0" do + it "removes leading NULL bytes and whitespace" do a = "\000 \000hello\000 \000" a.lstrip! a.should == "hello\000 \000" end end - it "returns nil if no modifications were made" do - a = "hello" - a.lstrip!.should == nil - a.should == "hello" - end - it "raises a FrozenError on a frozen instance that is modified" do -> { " hello ".freeze.lstrip! }.should raise_error(FrozenError) end @@ -51,9 +65,13 @@ -> { "".freeze.lstrip! }.should raise_error(FrozenError) end - it "raises an ArgumentError if the first codepoint is invalid" do + it "raises an ArgumentError if the first non-space codepoint is invalid" do s = "\xDFabc".force_encoding(Encoding::UTF_8) s.valid_encoding?.should be_false -> { s.lstrip! }.should raise_error(ArgumentError) + + s = " \xDFabc".force_encoding(Encoding::UTF_8) + s.valid_encoding?.should be_false + -> { s.lstrip! }.should raise_error(ArgumentError) end end diff --git a/spec/ruby/core/string/ord_spec.rb b/spec/ruby/core/string/ord_spec.rb index cfc630a1249d..4cf26990fede 100644 --- a/spec/ruby/core/string/ord_spec.rb +++ b/spec/ruby/core/string/ord_spec.rb @@ -25,4 +25,9 @@ it "raises an ArgumentError if called on an empty String" do -> { ''.ord }.should raise_error(ArgumentError) end + + it "raises ArgumentError if the character is broken" do + s = "©".force_encoding("US-ASCII") + -> { s.ord }.should raise_error(ArgumentError, "invalid byte sequence in US-ASCII") + end end diff --git a/spec/ruby/core/string/reverse_spec.rb b/spec/ruby/core/string/reverse_spec.rb index bade4685d996..4206b8af90b7 100644 --- a/spec/ruby/core/string/reverse_spec.rb +++ b/spec/ruby/core/string/reverse_spec.rb @@ -29,6 +29,14 @@ it "reverses a string with multi byte characters" do "微軟正黑體".reverse.should == "體黑正軟微" end + + it "works with a broken string" do + str = "微軟\xDF\xDE正黑體".force_encoding(Encoding::UTF_8) + + str.valid_encoding?.should be_false + + str.reverse.should == "體黑正\xDE\xDF軟微" + end end describe "String#reverse!" do @@ -55,4 +63,13 @@ str.reverse! str.should == "體黑正軟微" end + + it "works with a broken string" do + str = "微軟\xDF\xDE正黑體".force_encoding(Encoding::UTF_8) + + str.valid_encoding?.should be_false + str.reverse! + + str.should == "體黑正\xDE\xDF軟微" + end end diff --git a/spec/ruby/core/string/rstrip_spec.rb b/spec/ruby/core/string/rstrip_spec.rb index dc34b12719ed..ad8d6da37fd4 100644 --- a/spec/ruby/core/string/rstrip_spec.rb +++ b/spec/ruby/core/string/rstrip_spec.rb @@ -11,6 +11,14 @@ " hello world \n\r\t\n\v\r".rstrip.should == " hello world" "hello".rstrip.should == "hello" "hello\x00".rstrip.should == "hello" + "こにちわ ".rstrip.should == "こにちわ" + end + + it "works with lazy substrings" do + " hello "[1...-1].rstrip.should == " hello" + " hello world "[1...-1].rstrip.should == " hello world" + " hello world \n\r\t\n\v\r"[1...-1].rstrip.should == " hello world" + " こにちわ "[1...-1].rstrip.should == "こにちわ" end it "returns a copy of self with all trailing whitespace and NULL bytes removed" do @@ -37,6 +45,20 @@ a.should == "hello" end + it "makes a string empty if it is only whitespace" do + "".rstrip!.should == nil + " ".rstrip.should == "" + " ".rstrip.should == "" + end + + ruby_version_is '3.0' do + it "removes trailing NULL bytes and whitespace" do + a = "\000 goodbye \000" + a.rstrip! + a.should == "\000 goodbye" + end + end + it "raises a FrozenError on a frozen instance that is modified" do -> { " hello ".freeze.rstrip! }.should raise_error(FrozenError) end @@ -47,9 +69,18 @@ -> { "".freeze.rstrip! }.should raise_error(FrozenError) end - it "raises an ArgumentError if the last codepoint is invalid" do + it "raises an ArgumentError if the last non-space codepoint is invalid" do s = "abc\xDF".force_encoding(Encoding::UTF_8) s.valid_encoding?.should be_false -> { s.rstrip! }.should raise_error(ArgumentError) + + s = "abc\xDF ".force_encoding(Encoding::UTF_8) + s.valid_encoding?.should be_false + -> { s.rstrip! }.should raise_error(ArgumentError) + end + + it "removes broken codepoints" do + " abc \x80 ".rstrip!.should == " abc" + " abc \x80".rstrip!.should == " abc" end end diff --git a/spec/ruby/core/string/setbyte_spec.rb b/spec/ruby/core/string/setbyte_spec.rb index 03e5bad88b7c..77bff6403850 100644 --- a/spec/ruby/core/string/setbyte_spec.rb +++ b/spec/ruby/core/string/setbyte_spec.rb @@ -36,6 +36,12 @@ str.valid_encoding?.should be_true str.setbyte(2,253) str.valid_encoding?.should be_false + + str = "ABC" + str.setbyte(0, 0x20) # ' ' + str.should.valid_encoding? + str.setbyte(0, 0xE3) + str.should_not.valid_encoding? end it "regards a negative index as counting from the end of the String" do diff --git a/spec/ruby/core/string/split_spec.rb b/spec/ruby/core/string/split_spec.rb index 7ef34c65daae..b57f66081663 100644 --- a/spec/ruby/core/string/split_spec.rb +++ b/spec/ruby/core/string/split_spec.rb @@ -455,6 +455,14 @@ a.should == ["Chunky", "Bacon"] end + it "yields each split substring with default pattern for a lazy substring" do + a = [] + returned_object = "chunky bacon"[1...-1].split { |str| a << str.capitalize } + + returned_object.should == "hunky baco" + a.should == ["Hunky", "Baco"] + end + it "yields each split substring with default pattern for a non-ASCII string" do a = [] returned_object = "l'été arrive bientôt".split { |str| a << str } @@ -463,6 +471,14 @@ a.should == ["l'été", "arrive", "bientôt"] end + it "yields each split substring with default pattern for a non-ASCII lazy substring" do + a = [] + returned_object = "l'été arrive bientôt"[1...-1].split { |str| a << str } + + returned_object.should == "'été arrive bientô" + a.should == ["'été", "arrive", "bientô"] + end + it "yields the string when limit is 1" do a = [] returned_object = "chunky bacon".split("", 1) { |str| a << str.capitalize } diff --git a/spec/ruby/core/string/start_with_spec.rb b/spec/ruby/core/string/start_with_spec.rb index aaed197ff372..3833289f96d4 100644 --- a/spec/ruby/core/string/start_with_spec.rb +++ b/spec/ruby/core/string/start_with_spec.rb @@ -5,4 +5,14 @@ describe "String#start_with?" do it_behaves_like :start_with, :to_s + + # Here and not in the shared examples because this is invalid as a Symbol + it "does not check that we are not starting to match at the head of a character" do + "\xA9".should.start_with?("\xA9") # A9 is not a character head for UTF-8 + end + + it "does not check we are matching only part of a character" do + "\xe3\x81\x82".size.should == 1 + "\xe3\x81\x82".should.start_with?("\xe3") + end end diff --git a/spec/ruby/core/string/strip_spec.rb b/spec/ruby/core/string/strip_spec.rb index e841db54ce40..662f13b03219 100644 --- a/spec/ruby/core/string/strip_spec.rb +++ b/spec/ruby/core/string/strip_spec.rb @@ -35,6 +35,12 @@ a.should == "hello" end + it "makes a string empty if it is only whitespace" do + "".strip!.should == nil + " ".strip.should == "" + " ".strip.should == "" + end + ruby_version_is '3.0' do it "removes leading and trailing NULL bytes and whitespace" do a = "\000 goodbye \000" diff --git a/spec/ruby/core/string/swapcase_spec.rb b/spec/ruby/core/string/swapcase_spec.rb index 417f6c6d8d76..6307a1eaafe6 100644 --- a/spec/ruby/core/string/swapcase_spec.rb +++ b/spec/ruby/core/string/swapcase_spec.rb @@ -28,6 +28,10 @@ it "does not swapcase non-ASCII characters" do "aßet".swapcase(:ascii).should == "AßET" end + + it "works with substrings" do + "prefix aTé"[-3..-1].swapcase(:ascii).should == "Até" + end end describe "full Unicode case mapping adapted for Turkic languages" do diff --git a/spec/ruby/core/string/upcase_spec.rb b/spec/ruby/core/string/upcase_spec.rb index b2b34190feaa..209fe73b6ec9 100644 --- a/spec/ruby/core/string/upcase_spec.rb +++ b/spec/ruby/core/string/upcase_spec.rb @@ -27,6 +27,10 @@ it "does not upcase non-ASCII characters" do "aßet".upcase(:ascii).should == "AßET" end + + it "works with substrings" do + "prefix té"[-2..-1].upcase(:ascii).should == "Té" + end end describe "full Unicode case mapping adapted for Turkic languages" do diff --git a/spec/ruby/optional/capi/encoding_spec.rb b/spec/ruby/optional/capi/encoding_spec.rb index e18108c022fc..ae557b03d76a 100644 --- a/spec/ruby/optional/capi/encoding_spec.rb +++ b/spec/ruby/optional/capi/encoding_spec.rb @@ -128,10 +128,16 @@ describe "rb_enc_mbc_to_codepoint" do it "returns the correct codepoint for the given character and size" do - @s.rb_enc_mbc_to_codepoint("é", 2).should == 0x00E9 - @s.rb_enc_mbc_to_codepoint("éa", 2).should == 0x00E9 - @s.rb_enc_mbc_to_codepoint("éa", 1).should == 0xC3 - @s.rb_enc_mbc_to_codepoint("éa", 3).should == 0x00E9 + @s.rb_enc_mbc_to_codepoint("é").should == 0xE9 + end + + it "returns 0 if p == e" do + @s.rb_enc_mbc_to_codepoint("").should == 0 + end + + it "returns the raw byte if incomplete character in UTF-8" do + @s.rb_enc_mbc_to_codepoint("\xC3").should == 0xC3 + @s.rb_enc_mbc_to_codepoint("\x80").should == 0x80 end end @@ -630,6 +636,7 @@ it "returns the correct case fold for the given string" do @s.ONIGENC_MBC_CASE_FOLD("lower").should == ["l", 1] @s.ONIGENC_MBC_CASE_FOLD("Upper").should == ["u", 1] + @s.ONIGENC_MBC_CASE_FOLD("ABC"[1..-1]).should == ["b", 1] end it "works with other encodings" do diff --git a/spec/ruby/optional/capi/ext/encoding_spec.c b/spec/ruby/optional/capi/ext/encoding_spec.c index 68c4161bab0c..c49f6cde7e6e 100644 --- a/spec/ruby/optional/capi/ext/encoding_spec.c +++ b/spec/ruby/optional/capi/ext/encoding_spec.c @@ -120,10 +120,9 @@ static VALUE encoding_spec_rb_enc_from_index(VALUE self, VALUE index) { return rb_str_new2(rb_enc_from_index(NUM2INT(index))->name); } -static VALUE encoding_spec_rb_enc_mbc_to_codepoint(VALUE self, VALUE str, VALUE offset) { - int o = FIX2INT(offset); +static VALUE encoding_spec_rb_enc_mbc_to_codepoint(VALUE self, VALUE str) { char *p = RSTRING_PTR(str); - char *e = p + o; + char *e = RSTRING_END(str); return INT2FIX(rb_enc_mbc_to_codepoint(p, e, rb_enc_get(str))); } @@ -341,7 +340,7 @@ void Init_encoding_spec(void) { rb_define_method(cls, "rb_enc_isalnum", encoding_spec_rb_enc_isalnum, 2); rb_define_method(cls, "rb_enc_isspace", encoding_spec_rb_enc_isspace, 2); rb_define_method(cls, "rb_enc_from_index", encoding_spec_rb_enc_from_index, 1); - rb_define_method(cls, "rb_enc_mbc_to_codepoint", encoding_spec_rb_enc_mbc_to_codepoint, 2); + rb_define_method(cls, "rb_enc_mbc_to_codepoint", encoding_spec_rb_enc_mbc_to_codepoint, 1); rb_define_method(cls, "rb_enc_mbcput", encoding_spec_rb_enc_mbcput, 2); rb_define_method(cls, "rb_enc_from_encoding", encoding_spec_rb_enc_from_encoding, 1); rb_define_method(cls, "rb_enc_get", encoding_spec_rb_enc_get, 1); diff --git a/spec/ruby/shared/string/end_with.rb b/spec/ruby/shared/string/end_with.rb index 5f2a01123565..0e4c1386e8e7 100644 --- a/spec/ruby/shared/string/end_with.rb +++ b/spec/ruby/shared/string/end_with.rb @@ -38,7 +38,7 @@ it "uses only the needed arguments" do find = mock('h') find.should_not_receive(:to_str) - "hello".send(@method).should.end_with?("o",find) + "hello".send(@method).should.end_with?("o", find) end it "works for multibyte strings" do @@ -51,4 +51,11 @@ "あれ".send(@method).end_with?(pat) end.should raise_error(Encoding::CompatibilityError) end + + it "checks that we are starting to match at the head of a character" do + "\xC3\xA9".send(@method).should_not.end_with?("\xA9") + "\xe3\x81\x82".send(@method).should_not.end_with?("\x82") + "ab".force_encoding("UTF-16BE").send(@method).should_not.end_with?( + "b".force_encoding("UTF-16BE")) + end end diff --git a/spec/ruby/shared/string/start_with.rb b/spec/ruby/shared/string/start_with.rb index d8d6e13f6ae6..6932a017b6d6 100644 --- a/spec/ruby/shared/string/start_with.rb +++ b/spec/ruby/shared/string/start_with.rb @@ -69,4 +69,8 @@ Regexp.last_match.should be_nil $1.should be_nil end + + it "does not check that we are not matching part of a character" do + "\xC3\xA9".send(@method).should.start_with?("\xC3") + end end diff --git a/spec/tags/core/string/rstrip_tags.txt b/spec/tags/core/string/rstrip_tags.txt new file mode 100644 index 000000000000..c794b417778d --- /dev/null +++ b/spec/tags/core/string/rstrip_tags.txt @@ -0,0 +1 @@ +fails:String#rstrip! removes broken codepoints diff --git a/spec/tags/truffle/splitting_tags.txt b/spec/tags/truffle/splitting_tags.txt new file mode 100644 index 000000000000..74b1e2811626 --- /dev/null +++ b/spec/tags/truffle/splitting_tags.txt @@ -0,0 +1 @@ +slow:Critical methods whic must split are under 100 AST nodes diff --git a/spec/tags/truffle/string/string_tags.txt b/spec/tags/truffle/string/string_tags.txt deleted file mode 100644 index 1fa55f246d1e..000000000000 --- a/spec/tags/truffle/string/string_tags.txt +++ /dev/null @@ -1,2 +0,0 @@ -slow:String has critical methods of under 100 AST nodes - diff --git a/spec/truffle/ropes/dump_string_spec.rb b/spec/truffle/ropes/dump_string_spec.rb deleted file mode 100644 index 1188ede19baa..000000000000 --- a/spec/truffle/ropes/dump_string_spec.rb +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright (c) 2015, 2019 Oracle and/or its affiliates. All rights reserved. This -# code is released under a tri EPL/GPL/LGPL license. You can use it, -# redistribute it and/or modify it under the terms of the: -# -# Eclipse Public License version 2.0, or -# GNU General Public License version 2, or -# GNU Lesser General Public License version 2.1. - -require_relative '../../ruby/spec_helper' - -describe "Truffle::Ropes.dump_string" do - - it "returns a String" do - Truffle::Ropes.dump_string('foo').should be_kind_of(String) - end - - it "returns a sequence of escaped bytes in lower case" do - Truffle::Ropes.dump_string('foo').should =~ /(\\x[0-9a-f][0-9a-f])+/ - end - - it "returns correct bytes for the given string" do - Truffle::Ropes.dump_string('foo').should == "\\x66\\x6f\\x6f" - end - -end diff --git a/spec/truffle/ropes/substring_spec.rb b/spec/truffle/ropes/substring_spec.rb index 1f18acf78eab..6acbe915dbe9 100644 --- a/spec/truffle/ropes/substring_spec.rb +++ b/spec/truffle/ropes/substring_spec.rb @@ -10,8 +10,7 @@ require_relative '../../ruby/spec_helper' describe "A substring" do - - describe "of a UTF-8 valid SubtringRope" do + describe "of a UTF-8 valid SubstringRope" do it "correctly computes the index and consider offset as a byte offset" do complex = ("ééé" + "ascii)")[2..-1] complex[-1,1].should == ")" @@ -23,22 +22,4 @@ complex.end_with?(")").should == true end end - - describe "of a substring of a complex Rope" do - it "computes the bytes of that Rope to avoid flattening repetitively" do - side = 512 - big_string = ("a".b * side + "é".b + "z".b * side) - substring = big_string[1...-1] - Truffle::Ropes.should.bytes?(big_string) - Truffle::Ropes.should_not.bytes?(substring) - - subsubstring = substring.byteslice(4, 8) - Truffle::Ropes.should.bytes?(big_string) - Truffle::Ropes.should_not.bytes?(subsubstring) - - # done last as it computes the bytes as a side effect - subsubstring.should == "aaaaaaaa" - end - end - end diff --git a/spec/truffle/splitting_spec.rb b/spec/truffle/splitting_spec.rb new file mode 100644 index 000000000000..b95f7606334e --- /dev/null +++ b/spec/truffle/splitting_spec.rb @@ -0,0 +1,56 @@ +# Copyright (c) 2021 Oracle and/or its affiliates. All rights reserved. This +# code is released under a tri EPL/GPL/LGPL license. You can use it, +# redistribute it and/or modify it under the terms of the: +# +# Eclipse Public License version 2.0, or +# GNU General Public License version 2, or +# GNU Lesser General Public License version 2.1. + +require_relative '../ruby/spec_helper' + +describe 'Critical methods whic must split' do + it 'are under 100 AST nodes' do + code = <<-'EOF' +require 'strscan' + +methods = [ + String.instance_method(:sub), + String.instance_method(:sub!), + String.instance_method(:gsub), + String.instance_method(:gsub!), + + Truffle::StringOperations.method(:gsub_match_and_replace), + Truffle::StringOperations.method(:gsub_internal_hash), + Truffle::StringOperations.method(:gsub_internal_replacement), + Truffle::StringOperations.method(:gsub_internal_core_check_encoding), + Truffle::StringOperations.method(:gsub_internal_matches), + Truffle::StringOperations.method(:gsub_new_offset), + Truffle::StringOperations.method(:gsub_regexp_matches), + Truffle::StringOperations.method(:gsub_string_matches), + Truffle::StringOperations.method(:gsub_other_matches), + Truffle::StringOperations.method(:gsub_internal_yield_matches), + + Regexp.instance_method(:=~), + Regexp.instance_method(:match), + Regexp.instance_method(:match?), + Truffle::RegexpOperations.method(:match), + Truffle::RegexpOperations.method(:match?), + Truffle::RegexpOperations.method(:search_region), + Truffle::RegexpOperations.method(:match_in_region), + + String.instance_method(:[]), + Truffle::StringOperations.method(:subpattern), + + StringScanner.instance_method(:scan_internal), +] + +methods.each do |meth| + puts "#{Truffle::Debug.ast_size(meth)}: #{meth}" +end +EOF + out = ruby_exe(code) + out.lines.each do |line| + line.should =~ /^\d\d: .+$/ + end + end +end diff --git a/spec/truffle/string/string_spec.rb b/spec/truffle/string/string_spec.rb deleted file mode 100644 index 79b18e6c1ef5..000000000000 --- a/spec/truffle/string/string_spec.rb +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) 2021 Oracle and/or its affiliates. All rights reserved. This -# code is released under a tri EPL/GPL/LGPL license. You can use it, -# redistribute it and/or modify it under the terms of the: -# -# Eclipse Public License version 2.0, or -# GNU General Public License version 2, or -# GNU Lesser General Public License version 2.1. - -require_relative '../../ruby/spec_helper' - -describe 'String' do - it 'has critical methods of under 100 AST nodes' do - cmd = <<-EOF -require 'strscan' -puts Truffle::Debug.ast_size(String.instance_method(:sub)) < 100 -puts Truffle::Debug.ast_size(String.instance_method(:sub!)) < 100 -puts Truffle::Debug.ast_size(String.instance_method(:gsub)) < 100 -puts Truffle::Debug.ast_size(String.instance_method(:gsub!)) < 100 -puts Truffle::Debug.ast_size(Truffle::StringOperations.method(:gsub_match_and_replace)) < 100 -puts Truffle::Debug.ast_size(Truffle::StringOperations.method(:gsub_internal_hash)) < 100 -puts Truffle::Debug.ast_size(Truffle::StringOperations.method(:gsub_internal_replacement)) < 100 -puts Truffle::Debug.ast_size(Truffle::StringOperations.method(:gsub_internal_core_check_encoding)) < 100 -puts Truffle::Debug.ast_size(Truffle::StringOperations.method(:gsub_internal_matches)) < 100 -puts Truffle::Debug.ast_size(Truffle::StringOperations.method(:gsub_new_offset)) < 100 -puts Truffle::Debug.ast_size(Truffle::StringOperations.method(:gsub_regexp_matches)) < 100 -puts Truffle::Debug.ast_size(Truffle::StringOperations.method(:gsub_string_matches)) < 100 -puts Truffle::Debug.ast_size(Truffle::StringOperations.method(:gsub_other_matches)) < 100 -puts Truffle::Debug.ast_size(Truffle::StringOperations.method(:gsub_internal_yield_matches)) < 100 -puts Truffle::Debug.ast_size(StringScanner.instance_method(:scan_internal)) < 100 -EOF - ruby_exe(cmd).should == "true\n" * 15; - end -end diff --git a/spec/truffle/string/truncate_spec.rb b/spec/truffle/string/truncate_spec.rb index b69dd487db54..716f56743a14 100644 --- a/spec/truffle/string/truncate_spec.rb +++ b/spec/truffle/string/truncate_spec.rb @@ -1,3 +1,5 @@ +# truffleruby_primitives: true + # Copyright (c) 2016, 2019 Oracle and/or its affiliates. All rights reserved. This # code is released under a tri EPL/GPL/LGPL license. You can use it, # redistribute it and/or modify it under the terms of the: @@ -9,24 +11,24 @@ require_relative '../../ruby/spec_helper' -describe "Truffle::StringOperations.truncate" do +describe "Primitive.string_truncate" do it "should truncate if the new byte length is shorter than the current length" do str = "abcdef" - Truffle::StringOperations.truncate(str, 3) + Primitive.string_truncate(str, 3) str.should == "abc" end it "should raise an error if the new byte length is greater than the current length" do -> do - Truffle::StringOperations.truncate("abc", 10) + Primitive.string_truncate("abc", 10) end.should raise_error(ArgumentError) end it "should raise an error if the new byte length is negative" do -> do - Truffle::StringOperations.truncate("abc", -1) + Primitive.string_truncate("abc", -1) end.should raise_error(ArgumentError) end end diff --git a/src/main/.checkstyle_checks.xml b/src/main/.checkstyle_checks.xml index 0acefa57d2c0..824fcbb1f5c4 100644 --- a/src/main/.checkstyle_checks.xml +++ b/src/main/.checkstyle_checks.xml @@ -1,9 +1,6 @@ - diff --git a/src/main/c/cext/encoding.c b/src/main/c/cext/encoding.c index 62e9430601c7..907a8a21e715 100644 --- a/src/main/c/cext/encoding.c +++ b/src/main/c/cext/encoding.c @@ -75,17 +75,19 @@ unsigned int rb_enc_codepoint_len(const char *p, const char *e, int *len_p, rb_e if (len <= 0) { rb_raise(rb_eArgError, "empty string"); } - VALUE array = RUBY_CEXT_INVOKE("rb_enc_codepoint_len", rb_str_new(p, len), rb_enc_from_encoding(encoding)); - if (len_p) *len_p = polyglot_as_i32(polyglot_invoke(rb_tr_unwrap(array), "[]", 0)); - return (unsigned int)polyglot_as_i32(polyglot_invoke(rb_tr_unwrap(array), "[]", 1)); + VALUE array = RUBY_CEXT_INVOKE("rb_enc_codepoint_len", rb_tr_temporary_native_string(p, len, encoding)); + if (len_p) { + *len_p = polyglot_as_i32(polyglot_invoke(rb_tr_unwrap(array), "[]", 0)); + } + return (unsigned int) polyglot_as_i32(polyglot_invoke(rb_tr_unwrap(array), "[]", 1)); } int rb_enc_mbc_to_codepoint(char *p, char *e, rb_encoding *enc) { int length = e - p; - return polyglot_as_i32(polyglot_invoke(RUBY_CEXT, "rb_enc_mbc_to_codepoint", - rb_tr_unwrap(rb_enc_from_encoding(enc)), - rb_tr_unwrap(rb_str_new(p, length)), - length)); + if (length <= 0) { + return 0; + } + return polyglot_as_i32(RUBY_CEXT_INVOKE_NO_WRAP("rb_enc_mbc_to_codepoint", rb_tr_temporary_native_string(p, length, enc))); } int rb_tr_code_to_mbclen(OnigCodePoint code, OnigEncodingType *encoding) { @@ -93,7 +95,7 @@ int rb_tr_code_to_mbclen(OnigCodePoint code, OnigEncodingType *encoding) { } int rb_enc_codelen(int c, rb_encoding *enc) { - int n = ONIGENC_CODE_TO_MBCLEN(enc,c); + int n = ONIGENC_CODE_TO_MBCLEN(enc, c); if (n == 0) { rb_raise(rb_eArgError, "invalid codepoint 0x%x in %s", c, rb_enc_name(enc)); } @@ -223,26 +225,25 @@ int rb_enc_get_index(VALUE obj) { } char* rb_enc_left_char_head(char *start, char *p, char *end, rb_encoding *enc) { - int length = start-end; + int length = start - end; int position = polyglot_as_i32(polyglot_invoke(RUBY_CEXT, "rb_enc_left_char_head", rb_tr_unwrap(rb_enc_from_encoding(enc)), rb_tr_unwrap(rb_str_new(start, length)), - 0, - p-start, - length)); - return start+position; + p - start)); + return start + position; +} + +int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc) { + int length = e-p; + return polyglot_as_i32(RUBY_CEXT_INVOKE_NO_WRAP("rb_enc_mbclen", rb_tr_temporary_native_string(p, length, enc))); } int rb_enc_precise_mbclen(const char *p, const char *e, rb_encoding *enc) { - int length = e - p; if (e <= p) { return ONIGENC_CONSTRUCT_MBCLEN_NEEDMORE(1); } - return polyglot_as_i32(polyglot_invoke(RUBY_CEXT, "rb_enc_precise_mbclen", - rb_tr_unwrap(rb_enc_from_encoding(enc)), - rb_tr_unwrap(rb_str_new(p, length)), - 0, - length)); + int length = e - p; + return polyglot_as_i32(RUBY_CEXT_INVOKE_NO_WRAP("rb_enc_precise_mbclen", rb_tr_temporary_native_string(p, length, enc))); } int rb_enc_dummy_p(rb_encoding *enc) { @@ -257,15 +258,6 @@ int rb_enc_mbminlen(rb_encoding *enc) { return polyglot_as_i32(RUBY_CEXT_INVOKE_NO_WRAP("rb_enc_mbminlen", rb_enc_from_encoding(enc))); } -int rb_enc_mbclen(const char *p, const char *e, rb_encoding *enc) { - int length = e-p; - return polyglot_as_i32(polyglot_invoke(RUBY_CEXT, "rb_enc_mbclen", - rb_tr_unwrap(rb_enc_from_encoding(enc)), - rb_tr_unwrap(rb_str_new(p, length)), - 0, - length)); -} - int rb_define_dummy_encoding(const char *name) { return polyglot_as_i32(RUBY_CEXT_INVOKE_NO_WRAP("rb_define_dummy_encoding", rb_str_new_cstr(name))); } @@ -275,9 +267,14 @@ int rb_enc_str_asciionly_p(VALUE str) { return polyglot_as_boolean(RUBY_INVOKE_NO_WRAP(str, "ascii_only?")); } +VALUE rb_tr_temporary_native_string(const char *ptr, long len, rb_encoding *enc) { + return rb_tr_wrap(polyglot_invoke(RUBY_CEXT, + "rb_tr_temporary_native_string", ptr, len, rb_tr_unwrap(rb_enc_from_encoding(enc)))); +} + #undef rb_enc_str_new VALUE rb_enc_str_new(const char *ptr, long len, rb_encoding *enc) { - return RUBY_INVOKE(rb_str_new(ptr, len), "force_encoding", rb_enc_from_encoding(enc)); + return RUBY_INVOKE(rb_str_new(ptr, len), "force_encoding", rb_enc_from_encoding(enc)); // TODO: do it more directly } #undef rb_enc_str_new_cstr @@ -306,9 +303,9 @@ VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc) { void rb_enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...) { va_list args; va_start(args, fmt); - VALUE mesg = rb_vsprintf(fmt, args); + VALUE mesg = rb_enc_vsprintf(enc, fmt, args); va_end(args); - rb_exc_raise(rb_exc_new_str(exc, RUBY_INVOKE(mesg, "force_encoding", rb_enc_from_encoding(enc)))); + rb_exc_raise(rb_exc_new_str(exc, mesg)); } #define castchar(from) (char)((from) & 0xff) @@ -357,17 +354,16 @@ int rb_uv_to_utf8(char buf[6], unsigned long uv) { rb_raise(rb_eRangeError, "pack(U): value out of range"); } -void write_p(const UChar** p, int offset) { +static void advance_p(const UChar** p, int offset) { *p = *p + offset; } int rb_tr_enc_mbc_case_fold(rb_encoding *enc, int flag, const UChar** p, const UChar* end, UChar* result) { int length = end - *p; VALUE result_str = rb_tr_wrap(polyglot_invoke(RUBY_CEXT, "rb_tr_enc_mbc_case_fold", - rb_tr_unwrap(rb_enc_from_encoding(enc)), flag, - rb_tr_unwrap(rb_str_new((char *)*p, length)), - write_p, + rb_tr_unwrap(rb_tr_temporary_native_string((char *)*p, length, enc)), + advance_p, p)); int result_len = RSTRING_LEN(result_str); if (result_len > 0) { diff --git a/src/main/c/cext/string.c b/src/main/c/cext/string.c index 5383e47e1433..d4b2f582e2f3 100644 --- a/src/main/c/cext/string.c +++ b/src/main/c/cext/string.c @@ -116,7 +116,7 @@ VALUE rb_str_inspect(VALUE string) { } ID rb_intern_str(VALUE string) { - return SYM2ID(RUBY_CEXT_INVOKE("rb_intern_str", string)); + return SYM2ID(RUBY_INVOKE(string, "intern")); } VALUE rb_str_cat(VALUE string, const char *to_concat, long length) { @@ -140,8 +140,7 @@ VALUE rb_str_buf_append(VALUE string, VALUE other) { } VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc) { - VALUE other = rb_enc_str_new(ptr, len, enc); - return rb_str_concat(str, other); + return rb_str_concat(str, rb_tr_temporary_native_string(ptr, len, enc)); } #undef rb_str_cat_cstr @@ -317,7 +316,7 @@ VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts) { } VALUE rb_usascii_str_new(const char *ptr, long len) { - return RUBY_INVOKE(rb_str_new(ptr, len), "force_encoding", rb_enc_from_encoding(rb_usascii_encoding())); + return rb_enc_str_new(ptr, len, rb_usascii_encoding()); } VALUE rb_usascii_str_new_static(const char *ptr, long len) { @@ -325,7 +324,7 @@ VALUE rb_usascii_str_new_static(const char *ptr, long len) { } VALUE rb_usascii_str_new_cstr(const char *ptr) { - return RUBY_INVOKE(rb_str_new_cstr(ptr), "force_encoding", rb_enc_from_encoding(rb_usascii_encoding())); + return rb_usascii_str_new(ptr, strlen(ptr)); } VALUE rb_str_times(VALUE string, VALUE times) { diff --git a/src/main/c/cext/symbol.c b/src/main/c/cext/symbol.c index 5b7b3bb6c18a..b65bd2b4c987 100644 --- a/src/main/c/cext/symbol.c +++ b/src/main/c/cext/symbol.c @@ -20,15 +20,15 @@ ID rb_to_id(VALUE name) { #undef rb_intern ID rb_intern(const char *string) { - return SYM2ID(RUBY_CEXT_INVOKE("rb_intern", rb_str_new_cstr(string))); + return rb_intern2(string, strlen(string)); } ID rb_intern2(const char *string, long length) { - return SYM2ID(RUBY_CEXT_INVOKE("rb_intern", rb_str_new(string, length))); + return SYM2ID(RUBY_INVOKE(rb_tr_temporary_native_string(string, length, rb_ascii8bit_encoding()), "intern")); } ID rb_intern3(const char *name, long len, rb_encoding *enc) { - return SYM2ID(RUBY_CEXT_INVOKE("rb_intern3", rb_str_new(name, len), rb_enc_from_encoding(enc))); + return SYM2ID(RUBY_INVOKE(rb_tr_temporary_native_string(name, len, enc), "intern")); } VALUE rb_sym2str(VALUE string) { @@ -72,8 +72,7 @@ ID rb_check_id(volatile VALUE *namep) { } VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc) { - VALUE str = rb_enc_str_new(ptr, len, enc); - return RUBY_CEXT_INVOKE("rb_check_symbol_cstr", str); + return RUBY_CEXT_INVOKE("rb_check_symbol_cstr", rb_tr_temporary_native_string(ptr, len, enc)); } VALUE rb_sym_to_s(VALUE sym) { diff --git a/src/main/c/cext/truffleruby-impl.h b/src/main/c/cext/truffleruby-impl.h index 31041cb64ee8..1c2f809010aa 100644 --- a/src/main/c/cext/truffleruby-impl.h +++ b/src/main/c/cext/truffleruby-impl.h @@ -20,4 +20,12 @@ #define rb_boolean(c) ((c) ? Qtrue : Qfalse) +// Private functions + extern bool (*rb_tr_is_native_object)(VALUE value); + +// Create a native MutableTruffleString from ptr and len without copying. +// The returned RubyString is only valid as long as ptr is valid (typically only as long as the caller is on the stack), +// so this must be only used as an argument to an internal Truffle::CExt method which does not return or store +// the RubyString but only run some operation on it. +VALUE rb_tr_temporary_native_string(const char *ptr, long len, rb_encoding *enc); diff --git a/src/main/java/org/truffleruby/RubyContext.java b/src/main/java/org/truffleruby/RubyContext.java index 1adade7b6933..77a68e85c0e3 100644 --- a/src/main/java/org/truffleruby/RubyContext.java +++ b/src/main/java/org/truffleruby/RubyContext.java @@ -512,11 +512,11 @@ private void dispose() { Signals.restoreDefaultHandlers(); - if (options.ROPE_PRINT_INTERN_STATS) { - RubyLanguage.LOGGER.info("ropes re-used: " + language.ropeCache.getRopesReusedCount()); - RubyLanguage.LOGGER.info("rope byte arrays re-used: " + language.ropeCache.getByteArrayReusedCount()); - RubyLanguage.LOGGER.info("rope bytes saved: " + language.ropeCache.getRopeBytesSaved()); - RubyLanguage.LOGGER.info("total ropes interned: " + language.ropeCache.totalRopes()); + if (options.PRINT_INTERNED_TSTRING_STATS) { + RubyLanguage.LOGGER.info("tstrings re-used: " + language.tstringCache.getTStringsReusedCount()); + RubyLanguage.LOGGER.info("tstring byte arrays re-used: " + language.tstringCache.getByteArrayReusedCount()); + RubyLanguage.LOGGER.info("tstring bytes saved: " + language.tstringCache.getTStringBytesSaved()); + RubyLanguage.LOGGER.info("total tstrings interned: " + language.tstringCache.totalTStrings()); } if (options.CEXTS_TO_NATIVE_STATS) { diff --git a/src/main/java/org/truffleruby/RubyFileTypeDetector.java b/src/main/java/org/truffleruby/RubyFileTypeDetector.java index ee46a7df71fb..73bca2c574e1 100644 --- a/src/main/java/org/truffleruby/RubyFileTypeDetector.java +++ b/src/main/java/org/truffleruby/RubyFileTypeDetector.java @@ -17,10 +17,10 @@ import java.util.regex.Pattern; import org.jcodings.Encoding; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.core.encoding.EncodingManager; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.parser.lexer.RubyLexer; import com.oracle.truffle.api.TruffleFile; @@ -76,9 +76,10 @@ public Charset findEncoding(TruffleFile file) throws IOException { encodingCommentLine = firstLine; } if (encodingCommentLine != null) { - Rope encodingCommentRope = StringOperations.encodeRope(encodingCommentLine, UTF8Encoding.INSTANCE); + var encodingComment = new TStringWithEncoding(TStringUtils.utf8TString(encodingCommentLine), + Encodings.UTF_8); Charset[] encodingHolder = new Charset[1]; - RubyLexer.parseMagicComment(encodingCommentRope, (name, value) -> { + RubyLexer.parseMagicComment(encodingComment, (name, value) -> { if (RubyLexer.isMagicEncodingComment(name)) { Encoding encoding = EncodingManager.getEncoding(value); if (encoding != null) { diff --git a/src/main/java/org/truffleruby/RubyLanguage.java b/src/main/java/org/truffleruby/RubyLanguage.java index 497ab9c880fa..667ba50dda9f 100644 --- a/src/main/java/org/truffleruby/RubyLanguage.java +++ b/src/main/java/org/truffleruby/RubyLanguage.java @@ -30,8 +30,9 @@ import com.oracle.truffle.api.object.Shape; import com.oracle.truffle.api.source.Source; import com.oracle.truffle.api.source.SourceSection; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import org.graalvm.options.OptionDescriptors; -import org.jcodings.Encoding; import org.truffleruby.builtins.PrimitiveManager; import org.truffleruby.cext.ValueWrapperManager; import org.truffleruby.collections.SharedIndicesMap; @@ -71,10 +72,8 @@ import org.truffleruby.core.regexp.RegexpTable; import org.truffleruby.core.regexp.RubyMatchData; import org.truffleruby.core.regexp.RubyRegexp; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.PathToRopeCache; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeCache; +import org.truffleruby.core.string.PathToTStringCache; +import org.truffleruby.core.string.TStringCache; import org.truffleruby.core.string.CoreStrings; import org.truffleruby.core.string.FrozenStringLiterals; import org.truffleruby.core.string.RubyString; @@ -146,7 +145,8 @@ RubyLanguage.MIME_TYPE_MAIN_SCRIPT }, defaultMimeType = RubyLanguage.MIME_TYPE, dependentLanguages = { "nfi", "llvm", "regex" }, - fileTypeDetectors = RubyFileTypeDetector.class) + fileTypeDetectors = RubyFileTypeDetector.class, + needsAllEncodings = true) @ProvidedTags({ CoverageManager.LineTag.class, TraceManager.CallTag.class, @@ -210,7 +210,7 @@ public static final class ThreadLocalState { public final CoreStrings coreStrings; public final CoreSymbols coreSymbols; public final PrimitiveManager primitiveManager; - public final RopeCache ropeCache; + public final TStringCache tstringCache; public final RegexpTable regexpTable; public final SymbolTable symbolTable; public final KeywordArgumentsDescriptorManager keywordArgumentsDescriptorManager = new KeywordArgumentsDescriptorManager(); @@ -229,7 +229,7 @@ public static final class ThreadLocalState { @CompilationFinal public CoverageManager coverageManager; private final AtomicLong nextObjectID = new AtomicLong(ObjectSpaceManager.INITIAL_LANGUAGE_OBJECT_ID); - private final PathToRopeCache pathToRopeCache = new PathToRopeCache(this); + private final PathToTStringCache pathToTStringCache = new PathToTStringCache(this); public final SharedIndicesMap globalVariablesMap = new SharedIndicesMap(); private final LanguageArray globalVariableNeverAliasedAssumptions = new LanguageArray<>( @@ -323,10 +323,10 @@ public RubyLanguage() { coreStrings = new CoreStrings(this); coreSymbols = new CoreSymbols(); primitiveManager = new PrimitiveManager(); - ropeCache = new RopeCache(coreSymbols); - symbolTable = new SymbolTable(ropeCache, coreSymbols); + tstringCache = new TStringCache(coreSymbols); + symbolTable = new SymbolTable(tstringCache, coreSymbols); regexpTable = new RegexpTable(); - frozenStringLiterals = new FrozenStringLiterals(ropeCache); + frozenStringLiterals = new FrozenStringLiterals(tstringCache); } public RubyThread getCurrentThread() { @@ -360,8 +360,8 @@ public RubySymbol getSymbol(String string) { } @TruffleBoundary - public RubySymbol getSymbol(Rope rope, RubyEncoding encoding) { - return symbolTable.getSymbol(rope, encoding); + public RubySymbol getSymbol(AbstractTruffleString name, RubyEncoding encoding) { + return symbolTable.getSymbol(name, encoding); } public Assumption getTracingAssumption() { @@ -508,7 +508,7 @@ protected RootCallTarget parse(ParsingRequest request) { final RubySource rubySource = new RubySource( source, parsingParameters.getPath(), - parsingParameters.getRope()); + parsingParameters.getTStringWithEnc()); final ParserContext parserContext = MIME_TYPE_MAIN_SCRIPT.equals(source.getMimeType()) ? ParserContext.TOP_LEVEL_FIRST : ParserContext.TOP_LEVEL; @@ -701,12 +701,8 @@ public AllocationReporter getAllocationReporter() { return allocationReporter; } - public ImmutableRubyString getFrozenStringLiteral(byte[] bytes, Encoding encoding, CodeRange codeRange) { - return frozenStringLiterals.getFrozenStringLiteral(bytes, encoding, codeRange); - } - - public ImmutableRubyString getFrozenStringLiteral(Rope rope) { - return frozenStringLiterals.getFrozenStringLiteral(rope); + public ImmutableRubyString getFrozenStringLiteral(TruffleString tstring, RubyEncoding encoding) { + return frozenStringLiterals.getFrozenStringLiteral(tstring, encoding); } public long getNextObjectID() { @@ -719,8 +715,8 @@ public long getNextObjectID() { return id; } - public PathToRopeCache getPathToRopeCache() { - return pathToRopeCache; + public PathToTStringCache getPathToTStringCache() { + return pathToTStringCache; } private static Shape createShape(Class layoutClass) { diff --git a/src/main/java/org/truffleruby/builtins/BuiltinsClasses.java b/src/main/java/org/truffleruby/builtins/BuiltinsClasses.java index 87f09cde8e3a..ec97bc25dff8 100644 --- a/src/main/java/org/truffleruby/builtins/BuiltinsClasses.java +++ b/src/main/java/org/truffleruby/builtins/BuiltinsClasses.java @@ -104,12 +104,8 @@ import org.truffleruby.core.regexp.RegexpNodesFactory; import org.truffleruby.core.regexp.TruffleRegexpNodesBuiltins; import org.truffleruby.core.regexp.TruffleRegexpNodesFactory; -import org.truffleruby.core.rope.TruffleRopesNodesBuiltins; -import org.truffleruby.core.rope.TruffleRopesNodesFactory; import org.truffleruby.core.string.StringNodesBuiltins; import org.truffleruby.core.string.StringNodesFactory; -import org.truffleruby.core.string.TruffleStringNodesBuiltins; -import org.truffleruby.core.string.TruffleStringNodesFactory; import org.truffleruby.core.support.ByteArrayNodesBuiltins; import org.truffleruby.core.support.ByteArrayNodesFactory; import org.truffleruby.core.support.CustomRandomizerNodesBuiltins; @@ -249,9 +245,7 @@ public static void setupBuiltinsLazy(CoreMethodNodeManager coreManager) { TruffleMonitorNodesBuiltins.setup(coreManager); TrufflePosixNodesBuiltins.setup(coreManager); TruffleRegexpNodesBuiltins.setup(coreManager); - TruffleRopesNodesBuiltins.setup(coreManager); TruffleRubyNodesBuiltins.setup(coreManager); - TruffleStringNodesBuiltins.setup(coreManager); TruffleSystemNodesBuiltins.setup(coreManager); TruffleThreadNodesBuiltins.setup(coreManager); TypeNodesBuiltins.setup(coreManager); @@ -332,9 +326,7 @@ public static void setupBuiltinsLazyPrimitives(PrimitiveManager primitiveManager TruffleMonitorNodesBuiltins.setupPrimitives(primitiveManager); TrufflePosixNodesBuiltins.setupPrimitives(primitiveManager); TruffleRegexpNodesBuiltins.setupPrimitives(primitiveManager); - TruffleRopesNodesBuiltins.setupPrimitives(primitiveManager); TruffleRubyNodesBuiltins.setupPrimitives(primitiveManager); - TruffleStringNodesBuiltins.setupPrimitives(primitiveManager); TruffleSystemNodesBuiltins.setupPrimitives(primitiveManager); TruffleThreadNodesBuiltins.setupPrimitives(primitiveManager); TypeNodesBuiltins.setupPrimitives(primitiveManager); @@ -416,9 +408,7 @@ public static List>> getCoreN TruffleMonitorNodesFactory.getFactories(), TrufflePosixNodesFactory.getFactories(), TruffleRegexpNodesFactory.getFactories(), - TruffleRopesNodesFactory.getFactories(), TruffleRubyNodesFactory.getFactories(), - TruffleStringNodesFactory.getFactories(), TruffleSystemNodesFactory.getFactories(), TruffleThreadNodesFactory.getFactories(), TypeNodesFactory.getFactories(), diff --git a/src/main/java/org/truffleruby/cext/CExtNodes.java b/src/main/java/org/truffleruby/cext/CExtNodes.java index 1254a6a6aab6..45fbccd71ee4 100644 --- a/src/main/java/org/truffleruby/cext/CExtNodes.java +++ b/src/main/java/org/truffleruby/cext/CExtNodes.java @@ -15,10 +15,14 @@ import com.oracle.truffle.api.TruffleSafepoint; import com.oracle.truffle.api.object.DynamicObjectLibrary; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.MutableTruffleString; +import com.oracle.truffle.api.strings.TruffleString; +import com.oracle.truffle.api.strings.TruffleString.ErrorHandling; import org.jcodings.Encoding; import org.jcodings.IntHolder; -import org.jcodings.specific.USASCIIEncoding; import org.truffleruby.Layouts; +import org.truffleruby.RubyLanguage; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreMethodNode; @@ -29,7 +33,6 @@ import org.truffleruby.cext.CExtNodesFactory.CallWithCExtLockNodeFactory; import org.truffleruby.cext.CExtNodesFactory.StringToNativeNodeGen; import org.truffleruby.cext.UnwrapNode.UnwrapCArrayNode; -import org.truffleruby.core.CoreLibrary; import org.truffleruby.core.MarkingService.ExtensionCallStack; import org.truffleruby.core.MarkingServiceNodes; import org.truffleruby.core.array.ArrayToObjectArrayNode; @@ -38,6 +41,7 @@ import org.truffleruby.core.cast.HashCastNode; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.exception.ErrnoErrorNode; import org.truffleruby.core.exception.ExceptionOperations; import org.truffleruby.core.format.BytesResult; @@ -59,20 +63,13 @@ import org.truffleruby.core.numeric.FixnumOrBignumNode; import org.truffleruby.core.numeric.RubyBignum; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.Bytes; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.NativeRope; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringCachingGuards; -import org.truffleruby.core.string.StringNodes; -import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.string.StringHelperNodes; import org.truffleruby.core.string.StringSupport; import org.truffleruby.core.support.TypeNodes; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.core.thread.ThreadManager; +import org.truffleruby.extra.ffi.Pointer; import org.truffleruby.extra.ffi.RubyPointer; import org.truffleruby.interop.InteropNodes; import org.truffleruby.interop.ToJavaStringNode; @@ -107,7 +104,6 @@ import org.truffleruby.language.supercall.CallSuperMethodNode; import org.truffleruby.language.yield.CallBlockNode; import org.truffleruby.parser.IdentifierType; -import org.truffleruby.utils.Utils; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; @@ -115,7 +111,6 @@ import com.oracle.truffle.api.Truffle; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.CreateCast; -import com.oracle.truffle.api.dsl.ImportStatic; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.ReportPolymorphism; import com.oracle.truffle.api.dsl.Specialization; @@ -135,6 +130,17 @@ @CoreModule("Truffle::CExt") public class CExtNodes { + public static Pointer newNativeStringPointer(int capacity, RubyLanguage language) { + return Pointer.mallocAutoRelease(capacity + 1, language); + } + + private static long getNativeStringCapacity(Pointer pointer) { + final long nativeBufferSize = pointer.getSize(); + assert nativeBufferSize > 0; + // Do not count the extra byte for \0, like MRI. + return nativeBufferSize - 1; + } + @Primitive(name = "call_with_c_mutex_and_frame") public abstract static class CallWithCExtLockAndFrameNode extends PrimitiveArrayArgumentsNode { @@ -598,20 +604,15 @@ protected int long2fix(int num) { return num; } - @Specialization(guards = "fitsIntoInteger(num)") + @Specialization(guards = "fitsInInteger(num)") protected int long2fixInRange(long num) { return (int) num; } - @Specialization(guards = "!fitsIntoInteger(num)") + @Specialization(guards = "!fitsInInteger(num)") protected int long2fixOutOfRange(long num) { throw new RaiseException(getContext(), coreExceptions().rangeErrorConvertToInt(num, this)); } - - protected boolean fitsIntoInteger(long num) { - return CoreLibrary.fitsIntoInteger(num); - } - } @CoreMethod(names = "rb_enc_coderange_clear", onSingleton = true, required = 1) @@ -620,9 +621,8 @@ public abstract static class RbEncCodeRangeClear extends CoreMethodArrayArgument @Specialization protected RubyString clearCodeRange(RubyString string, @Cached StringToNativeNode stringToNativeNode) { - final NativeRope nativeRope = stringToNativeNode.executeToNative(string); - nativeRope.clearCodeRange(); - string.setRope(nativeRope); + stringToNativeNode.executeToNative(string); + string.clearCodeRange(); return string; } @@ -639,42 +639,32 @@ protected int codeToMbcLen(int code, RubyEncoding encoding) { } - @CoreMethod(names = "rb_enc_codepoint_len", onSingleton = true, required = 2) + @CoreMethod(names = "rb_enc_codepoint_len", onSingleton = true, required = 1) public abstract static class RbEncCodePointLenNode extends CoreMethodArrayArgumentsNode { - @Specialization(guards = "strings.isRubyString(string)") - protected RubyArray rbEncCodePointLen(Object string, RubyEncoding encoding, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached RopeNodes.BytesNode bytesNode, - @Cached RopeNodes.CalculateCharacterLengthNode calculateCharacterLengthNode, - @Cached RopeNodes.CodeRangeNode codeRangeNode, - @Cached ConditionProfile sameEncodingProfile, + @Specialization(guards = "strings.isRubyString(string)", limit = "1") + protected RubyArray rbEncCodePointLen(Object string, + @Cached RubyStringLibrary strings, + @Cached TruffleString.ByteLengthOfCodePointNode byteLengthOfCodePointNode, + @Cached TruffleString.CodePointAtByteIndexNode codePointAtByteIndexNode, @Cached BranchProfile errorProfile) { - final Rope rope = strings.getRope(string); - final byte[] bytes = bytesNode.execute(rope); - final CodeRange ropeCodeRange = codeRangeNode.execute(rope); - final Encoding enc = encoding.jcoding; - - final CodeRange cr; - if (sameEncodingProfile.profile(enc == rope.getEncoding())) { - cr = ropeCodeRange; - } else { - cr = CodeRange.CR_UNKNOWN; - } + var tstring = strings.getTString(string); + var encoding = strings.getEncoding(string); + var tencoding = encoding.tencoding; - final int r = calculateCharacterLengthNode.characterLength(enc, cr, new Bytes(bytes)); + final int r = byteLengthOfCodePointNode.execute(tstring, 0, tencoding, ErrorHandling.RETURN_NEGATIVE); if (!StringSupport.MBCLEN_CHARFOUND_P(r)) { errorProfile.enter(); throw new RaiseException( getContext(), - coreExceptions().argumentError(Utils.concat("invalid byte sequence in ", enc), this)); + coreExceptions().argumentErrorInvalidByteSequence(encoding, this)); } - final int len_p = StringSupport.MBCLEN_CHARFOUND_LEN(r); - final int codePoint = StringSupport.preciseCodePoint(enc, ropeCodeRange, bytes, 0, bytes.length); + int codePoint = codePointAtByteIndexNode.execute(tstring, 0, tencoding, ErrorHandling.RETURN_NEGATIVE); + assert codePoint != -1; - return createArray(new Object[]{ len_p, codePoint }); + return createArray(new int[]{ StringSupport.MBCLEN_CHARFOUND_LEN(r), codePoint }); } } @@ -705,23 +695,34 @@ public abstract static class RbStrNewNulNode extends CoreMethodArrayArgumentsNod @Specialization protected RubyString rbStrNewNul(int byteLength, - @Cached StringNodes.MakeStringNode makeStringNode) { - final Rope rope = NativeRope.newBuffer(getLanguage(), byteLength, byteLength); - - return makeStringNode.fromRope(rope, Encodings.BINARY); + @Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode) { + final Pointer pointer = Pointer.callocAutoRelease(byteLength + 1, getLanguage()); + var nativeTString = fromNativePointerNode.execute(pointer, 0, byteLength, Encodings.BINARY.tencoding, + false); + return createMutableString(nativeTString, Encodings.BINARY); } } + @CoreMethod(names = "rb_tr_temporary_native_string", onSingleton = true, required = 3, lowerFixnum = 2) + public abstract static class TemporaryNativeStringNode extends CoreMethodArrayArgumentsNode { + + @Specialization + protected RubyString temporaryNativeString(Object pointer, int byteLength, RubyEncoding encoding, + @Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode) { + var nativeTString = fromNativePointerNode.execute(pointer, 0, byteLength, encoding.tencoding, false); + return createMutableString(nativeTString, encoding); + } + } + @CoreMethod(names = "rb_str_capacity", onSingleton = true, required = 1) public abstract static class RbStrCapacityNode extends CoreMethodArrayArgumentsNode { @Specialization protected long capacity(Object string, @Cached StringToNativeNode stringToNativeNode) { - return stringToNativeNode.executeToNative(string).getCapacity(); + return getNativeStringCapacity(stringToNativeNode.executeToNative(string)); } - } @CoreMethod(names = "rb_str_set_len", onSingleton = true, required = 2, lowerFixnum = 2) @@ -729,26 +730,19 @@ public abstract static class RbStrSetLenNode extends CoreMethodArrayArgumentsNod @Specialization protected RubyString strSetLen(RubyString string, int newByteLength, + @Cached RubyStringLibrary libString, @Cached StringToNativeNode stringToNativeNode, - @Cached ConditionProfile asciiOnlyProfile) { - final NativeRope nativeRope = stringToNativeNode.executeToNative(string); - - final CodeRange newCodeRange; - final int newCharacterLength; - if (asciiOnlyProfile.profile(nativeRope.getRawCodeRange() == CodeRange.CR_7BIT)) { - newCodeRange = CodeRange.CR_7BIT; - newCharacterLength = newByteLength; - } else { - newCodeRange = CodeRange.CR_UNKNOWN; - newCharacterLength = NativeRope.UNKNOWN_CHARACTER_LENGTH; - } + @Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode) { + var pointer = stringToNativeNode.executeToNative(string); - final NativeRope newNativeRope = nativeRope.withByteLength(newByteLength, newCharacterLength, newCodeRange); - string.setRope(newNativeRope); + pointer.writeByte(newByteLength, (byte) 0); // Like MRI + + var newNativeTString = fromNativePointerNode.execute(pointer, 0, newByteLength, + libString.getTEncoding(string), false); + string.setTString(newNativeTString); return string; } - } @CoreMethod(names = "rb_str_resize", onSingleton = true, required = 2, lowerFixnum = 2) @@ -756,20 +750,25 @@ public abstract static class RbStrResizeNode extends CoreMethodArrayArgumentsNod @Specialization protected RubyString rbStrResize(RubyString string, int newByteLength, - @Cached StringToNativeNode stringToNativeNode) { - final NativeRope nativeRope = stringToNativeNode.executeToNative(string); + @Cached RubyStringLibrary libString, + @Cached StringToNativeNode stringToNativeNode, + @Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode) { + var pointer = stringToNativeNode.executeToNative(string); + var tencoding = libString.getTEncoding(string); + int byteLength = string.tstring.byteLength(tencoding); - if (nativeRope.byteLength() == newByteLength) { + if (byteLength == newByteLength) { // Like MRI's rb_str_resize() - nativeRope.clearCodeRange(); + string.clearCodeRange(); return string; } else { - final NativeRope newRope = nativeRope.resize(getLanguage(), newByteLength); + var newNativeTString = TrStrCapaResizeNode.resize(pointer, newByteLength, newByteLength, tencoding, + fromNativePointerNode, getLanguage()); + string.setTString(newNativeTString); // Like MRI's rb_str_resize() - newRope.clearCodeRange(); + string.clearCodeRange(); - string.setRope(newRope); return string; } } @@ -780,18 +779,33 @@ public abstract static class TrStrCapaResizeNode extends CoreMethodArrayArgument @Specialization protected RubyString trStrCapaResize(RubyString string, int newCapacity, - @Cached StringToNativeNode stringToNativeNode) { - final NativeRope nativeRope = stringToNativeNode.executeToNative(string); + @Cached RubyStringLibrary libString, + @Cached StringToNativeNode stringToNativeNode, + @Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode) { + var pointer = stringToNativeNode.executeToNative(string); + var tencoding = libString.getTEncoding(string); - if (nativeRope.getCapacity() == newCapacity) { + if (getNativeStringCapacity(pointer) == newCapacity) { return string; } else { - final NativeRope newRope = nativeRope.expandCapacity(getLanguage(), newCapacity); - string.setRope(newRope); + int byteLength = string.tstring.byteLength(tencoding); + var newNativeTString = resize(pointer, newCapacity, byteLength, tencoding, fromNativePointerNode, + getLanguage()); + string.setTString(newNativeTString); + return string; } } + static MutableTruffleString resize(Pointer pointer, int newCapacity, int newByteLength, + TruffleString.Encoding tencoding, MutableTruffleString.FromNativePointerNode fromNativePointerNode, + RubyLanguage language) { + final Pointer newPointer = newNativeStringPointer(newCapacity, language); + newPointer.writeBytes(0, pointer, 0, Math.min(pointer.getSize(), newCapacity)); + newPointer.writeByte(newCapacity, (byte) 0); // Like MRI + + return fromNativePointerNode.execute(newPointer, 0, newByteLength, tencoding, false); + } } @CoreMethod(names = "rb_keyword_given_p", onSingleton = true) @@ -989,7 +1003,7 @@ protected Object iterBreakValue(Object value) { @CoreMethod(names = "rb_sourcefile", onSingleton = true) public abstract static class SourceFileNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -997,7 +1011,7 @@ protected RubyString sourceFile() { final SourceSection sourceSection = getTopUserSourceSection("rb_sourcefile"); final String file = getLanguage().getSourcePath(sourceSection.getSource()); - return makeStringNode.executeMake(file, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, file, Encodings.UTF_8); } @TruffleBoundary @@ -1173,36 +1187,46 @@ public static StringToNativeNode create() { return StringToNativeNodeGen.create(); } - public abstract NativeRope executeToNative(Object string); + public abstract Pointer executeToNative(Object string); @Specialization - protected NativeRope toNative(RubyString string, + protected Pointer toNative(RubyString string, + @Cached RubyStringLibrary libString, @Cached ConditionProfile convertProfile, - @Cached RopeNodes.BytesNode bytesNode, - @Cached RopeNodes.CharacterLengthNode characterLengthNode, - @Cached RopeNodes.CodeRangeNode codeRangeNode) { - final Rope currentRope = string.rope; + @Cached TruffleString.CopyToNativeMemoryNode copyToNativeMemoryNode, + @Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode, + @Cached TruffleString.GetInternalNativePointerNode getInternalNativePointerNode) { + var tstring = string.tstring; + var tencoding = libString.getTEncoding(string); - final NativeRope nativeRope; + final Pointer pointer; - if (convertProfile.profile(currentRope instanceof NativeRope)) { - nativeRope = (NativeRope) currentRope; + if (convertProfile.profile(tstring.isNative())) { + assert tstring.isMutable(); + pointer = (Pointer) getInternalNativePointerNode.execute(tstring, tencoding); } else { - nativeRope = new NativeRope( - getLanguage(), - bytesNode.execute(currentRope), - currentRope.getEncoding(), - characterLengthNode.execute(currentRope), - codeRangeNode.execute(currentRope)); - string.setRope(nativeRope); + int byteLength = tstring.byteLength(tencoding); + pointer = allocateAndCopyToNative(tstring, tencoding, byteLength, copyToNativeMemoryNode, + getLanguage()); + + var nativeTString = fromNativePointerNode.execute(pointer, 0, byteLength, tencoding, false); + string.setTString(nativeTString); } - return nativeRope; + return pointer; } @Specialization - protected NativeRope toNativeImmutable(ImmutableRubyString string) { - return string.getNativeRope(getLanguage()); + protected Pointer toNativeImmutable(ImmutableRubyString string) { + return string.getNativeString(getLanguage()); + } + + public static Pointer allocateAndCopyToNative(AbstractTruffleString tstring, TruffleString.Encoding tencoding, + int capacity, TruffleString.CopyToNativeMemoryNode copyToNativeMemoryNode, RubyLanguage language) { + final Pointer pointer = newNativeStringPointer(capacity, language); + copyToNativeMemoryNode.execute(tstring, 0, pointer, 0, capacity, tencoding); + pointer.writeByte(capacity, (byte) 0); // Like MRI + return pointer; } } @@ -1213,29 +1237,25 @@ public abstract static class StringPointerToNativeNode extends PrimitiveArrayArg @Specialization protected long toNative(Object string, @Cached StringToNativeNode stringToNativeNode) { - final NativeRope nativeRope = stringToNativeNode.executeToNative(string); - - return nativeRope.getNativePointer().getAddress(); + return stringToNativeNode.executeToNative(string).getAddress(); } - } @CoreMethod(names = "string_to_ffi_pointer", onSingleton = true, required = 1) - public abstract static class StringToPointerNode extends CoreMethodArrayArgumentsNode { + public abstract static class StringToFFIPointerNode extends CoreMethodArrayArgumentsNode { @Specialization protected RubyPointer toNative(Object string, @Cached StringToNativeNode stringToNativeNode) { - final NativeRope nativeRope = stringToNativeNode.executeToNative(string); + var pointer = stringToNativeNode.executeToNative(string); final RubyPointer instance = new RubyPointer( coreLibrary().truffleFFIPointerClass, getLanguage().truffleFFIPointerShape, - nativeRope.getNativePointer()); + pointer); AllocationTracing.trace(instance, this); return instance; } - } @Primitive(name = "string_is_native?") @@ -1243,7 +1263,7 @@ public abstract static class StringPointerIsNativeNode extends PrimitiveArrayArg @Specialization protected boolean isNative(RubyString string) { - return string.rope instanceof NativeRope; + return string.tstring.isNative(); } @TruffleBoundary @@ -1271,8 +1291,8 @@ protected Object debug(Object... objects) { final String representation; if (libString.isRubyString(object)) { - final Rope rope = libString.getRope(object); - final byte[] bytes = rope.getBytes(); + var tstring = libString.getTString(object); + final byte[] bytes = TStringUtils.getBytesOrCopy(tstring, libString.getEncoding(object)); final StringBuilder builder = new StringBuilder(); for (int i = 0; i < bytes.length; i++) { @@ -1282,10 +1302,9 @@ protected Object debug(Object... objects) { builder.append(String.format("%02x", bytes[i])); } - representation = RopeOperations.decodeRope(rope) + " (" + builder.toString() + ")"; + representation = tstring.toString() + " (" + builder.toString() + ")"; } else if (RubyGuards.isRubyValue(object)) { - representation = object.toString() + " (" + - RubyStringLibrary.getUncached().getJavaString(callToS(object)) + ")"; + representation = object.toString() + " (" + RubyGuards.getJavaString(callToS(object)) + ")"; } else { representation = object.toString(); } @@ -1358,43 +1377,47 @@ protected Object executeThrow(CapturedException captured, } } - @CoreMethod(names = "rb_tr_enc_mbc_case_fold", onSingleton = true, required = 5, lowerFixnum = 2) + @CoreMethod(names = "rb_tr_enc_mbc_case_fold", onSingleton = true, required = 4, lowerFixnum = 1) public abstract static class RbTrMbcCaseFoldNode extends CoreMethodArrayArgumentsNode { @Specialization(guards = "strings.isRubyString(string)", limit = "getCacheLimit()") - protected Object rbTrEncMbcCaseFold(RubyEncoding enc, int flags, Object string, Object write_p, Object p, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @CachedLibrary("write_p") InteropLibrary receivers, - @Cached RopeNodes.BytesNode getBytes, - @Cached TranslateInteropExceptionNode translateInteropExceptionNode) { - final byte[] bytes = getBytes.execute(strings.getRope(string)); + protected Object rbTrEncMbcCaseFold(int flags, Object string, Object advance_p, Object p, + @Cached RubyStringLibrary strings, + @CachedLibrary("advance_p") InteropLibrary receivers, + @Cached TranslateInteropExceptionNode translateInteropExceptionNode, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode) { + var tstring = strings.getTString(string); + var encoding = strings.getEncoding(string); + var bytes = TStringUtils.getBytesOrFail(tstring, encoding, byteArrayNode); + final byte[] to = new byte[bytes.length]; final IntHolder intHolder = new IntHolder(); intHolder.value = 0; - final int resultLength = enc.jcoding - .mbcCaseFold(flags, bytes, intHolder, bytes.length, to); - InteropNodes.execute(write_p, new Object[]{ p, intHolder.value }, receivers, translateInteropExceptionNode); + + final int resultLength = encoding.jcoding.mbcCaseFold(flags, bytes, intHolder, bytes.length, to); + + InteropNodes.execute(advance_p, new Object[]{ p, intHolder.value }, receivers, + translateInteropExceptionNode); + final byte[] result = new byte[resultLength]; if (resultLength > 0) { System.arraycopy(to, 0, result, 0, resultLength); } - return StringOperations.createString( - this, - RopeOperations.create(result, USASCIIEncoding.INSTANCE, CodeRange.CR_UNKNOWN), - Encodings.US_ASCII); + + return createString(fromByteArrayNode, result, Encodings.US_ASCII); } protected int getCacheLimit() { return getLanguage().options.DISPATCH_CACHE; } - } @CoreMethod(names = "rb_tr_code_to_mbc", onSingleton = true, required = 2, lowerFixnum = 2) public abstract static class RbTrMbcPutNode extends CoreMethodArrayArgumentsNode { - @Specialization - protected Object rbTrEncMbcPut(RubyEncoding enc, int code) { + protected Object rbTrEncMbcPut(RubyEncoding enc, int code, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { final Encoding encoding = enc.jcoding; final byte buf[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN]; final int resultLength = encoding.codeToMbc(code, buf, 0); @@ -1402,121 +1425,82 @@ protected Object rbTrEncMbcPut(RubyEncoding enc, int code) { if (resultLength > 0) { System.arraycopy(buf, 0, result, 0, resultLength); } - return StringOperations.createString( - this, - RopeOperations.create(result, USASCIIEncoding.INSTANCE, CodeRange.CR_UNKNOWN), - Encodings.US_ASCII); + return createString(fromByteArrayNode, result, Encodings.US_ASCII); } - } @CoreMethod(names = "rb_enc_mbmaxlen", onSingleton = true, required = 1) public abstract static class RbEncMaxLenNode extends CoreMethodArrayArgumentsNode { - @Specialization protected Object rbEncMaxLen(RubyEncoding value) { return value.jcoding.maxLength(); } - } @CoreMethod(names = "rb_enc_mbminlen", onSingleton = true, required = 1) public abstract static class RbEncMinLenNode extends CoreMethodArrayArgumentsNode { - @Specialization protected Object rbEncMinLen(RubyEncoding value) { return value.jcoding.minLength(); } - } - @CoreMethod(names = "rb_enc_mbclen", onSingleton = true, required = 4, lowerFixnum = { 3, 4 }) + @CoreMethod(names = "rb_enc_mbclen", onSingleton = true, required = 1) public abstract static class RbEncMbLenNode extends CoreMethodArrayArgumentsNode { - - @Specialization(guards = "strings.isRubyString(string)") - protected Object rbEncMbLen(RubyEncoding enc, Object string, int p, int e, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached RopeNodes.BytesNode getBytes, - @Cached RopeNodes.CodeRangeNode codeRangeNode, - @Cached ConditionProfile sameEncodingProfile) { - final Encoding encoding = enc.jcoding; - final Rope rope = strings.getRope(string); - final Encoding ropeEncoding = rope.getEncoding(); - - return StringSupport.characterLength( - encoding, - sameEncodingProfile.profile(encoding == ropeEncoding) - ? codeRangeNode.execute(rope) - : CodeRange.CR_UNKNOWN, - getBytes.execute(strings.getRope(string)), - p, - e, - true); + @Specialization(guards = "strings.isRubyString(string)", limit = "1") + protected Object rbEncMbLen(Object string, + @Cached RubyStringLibrary strings, + @Cached TruffleString.ByteLengthOfCodePointNode byteLengthOfCodePointNode) { + var tstring = strings.getTString(string); + var tencoding = strings.getTEncoding(string); + return byteLengthOfCodePointNode.execute(tstring, 0, tencoding, ErrorHandling.BEST_EFFORT); } + } + @CoreMethod(names = "rb_enc_precise_mbclen", onSingleton = true, required = 1) + public abstract static class RbEncPreciseMbclenNode extends CoreMethodArrayArgumentsNode { + @Specialization(guards = "strings.isRubyString(string)", limit = "1") + protected int rbEncPreciseMbclen(Object string, + @Cached RubyStringLibrary strings, + @Cached TruffleString.ByteLengthOfCodePointNode byteLengthOfCodePointNode) { + var tstring = strings.getTString(string); + var tencoding = strings.getTEncoding(string); + return byteLengthOfCodePointNode.execute(tstring, 0, tencoding, ErrorHandling.RETURN_NEGATIVE); + } } - @CoreMethod(names = "rb_enc_left_char_head", onSingleton = true, required = 5, lowerFixnum = { 3, 4, 5 }) + @CoreMethod(names = "rb_enc_left_char_head", onSingleton = true, required = 3, lowerFixnum = 3) public abstract static class RbEncLeftCharHeadNode extends CoreMethodArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = "strings.isRubyString(string)") - protected Object rbEncLeftCharHead(RubyEncoding enc, Object string, int start, int p, int end, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - return enc.jcoding.leftAdjustCharHead( - strings.getRope(string).getBytes(), - start, - p, - end); + @Specialization(guards = "strings.isRubyString(string)", limit = "1") + protected Object rbEncLeftCharHead(RubyEncoding enc, Object string, int p, + @Cached RubyStringLibrary strings) { + byte[] bytes = TStringUtils.getBytesOrFail(strings.getTString(string), strings.getEncoding(string)); + return enc.jcoding.leftAdjustCharHead(bytes, 0, p, bytes.length); } - } - @CoreMethod(names = "rb_enc_mbc_to_codepoint", onSingleton = true, required = 3, lowerFixnum = 3) + @CoreMethod(names = "rb_enc_mbc_to_codepoint", onSingleton = true, required = 1) public abstract static class RbEncMbcToCodepointNode extends CoreMethodArrayArgumentsNode { - @Specialization(guards = "strings.isRubyString(string)") - protected int rbEncMbcToCodepoint(RubyEncoding enc, Object string, int end, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - final Rope rope = strings.getRope(string); - return StringSupport.mbcToCode(enc.jcoding, rope, 0, end); - } - } - - @CoreMethod(names = "rb_enc_precise_mbclen", onSingleton = true, required = 4, lowerFixnum = { 3, 4 }) - public abstract static class RbEncPreciseMbclenNode extends CoreMethodArrayArgumentsNode { - - @Child private RopeNodes.CodeRangeNode codeRangeNode; - - @Specialization(guards = "strings.isRubyString(string)") - protected int rbEncPreciseMbclen(RubyEncoding enc, Object string, int p, int end, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached RopeNodes.CalculateCharacterLengthNode calculateCharacterLengthNode, - @Cached RopeNodes.GetBytesObjectNode getBytesObject, - @Cached ConditionProfile sameEncodingProfile) { - final Encoding encoding = enc.jcoding; - final Rope rope = strings.getRope(string); - final CodeRange cr; - if (sameEncodingProfile.profile(encoding == rope.getEncoding())) { - cr = codeRange(rope); + @Specialization(guards = "strings.isRubyString(string)", limit = "1") + protected int rbEncMbcToCodepoint(Object string, + @Cached RubyStringLibrary strings, + @Cached TruffleString.CodePointAtByteIndexNode codePointAtByteIndexNode, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode, + @Cached ConditionProfile brokenProfile) { + var tstring = strings.getTString(string); + var encoding = strings.getEncoding(string); + int codepoint = codePointAtByteIndexNode.execute(tstring, 0, encoding.tencoding, + ErrorHandling.RETURN_NEGATIVE); + if (brokenProfile.profile(codepoint == -1)) { + var byteArray = byteArrayNode.execute(tstring, encoding.tencoding); + return StringSupport.mbcToCode(encoding.jcoding, byteArray.getArray(), byteArray.getOffset(), + byteArray.getEnd()); } else { - cr = CodeRange.CR_UNKNOWN; + return codepoint; } - - final int length = calculateCharacterLengthNode - .characterLength(encoding, cr, getBytesObject.getRange(rope, p, end)); - assert end - p >= length; // assert this condition not reached: https://github.com/ruby/ruby/blob/46a5d1b4a63f624f2c5c5b6f710cc1a176c88b02/encoding.c#L1046 - return length; } - - private CodeRange codeRange(Rope rope) { - if (codeRangeNode == null) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - codeRangeNode = insert(RopeNodes.CodeRangeNode.create()); - } - - return codeRangeNode.execute(rope); - } - } @Primitive(name = "cext_wrap") @@ -1700,11 +1684,11 @@ protected Object wrapFunction() { @CoreMethod(names = "rb_check_symbol_cstr", onSingleton = true, required = 1) public abstract static class RbCheckSymbolCStrNode extends CoreMethodArrayArgumentsNode { - @Specialization(guards = "strings.isRubyString(string)") + @Specialization(guards = "strings.isRubyString(string)", limit = "1") protected Object checkSymbolCStr(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { final RubySymbol sym = getLanguage().symbolTable.getSymbolIfExists( - strings.getRope(string), + strings.getTString(string), strings.getEncoding(string)); return sym == null ? nil : sym; } @@ -1721,34 +1705,38 @@ protected RubyArray rbAryNewFromValues(Object cArray, } @CoreMethod(names = "rb_tr_sprintf_types", onSingleton = true, required = 1) - @ImportStatic({ StringCachingGuards.class, StringOperations.class }) @ReportPolymorphism public abstract static class RBSprintfFormatNode extends CoreMethodArrayArgumentsNode { + @Child protected TruffleString.GetInternalByteArrayNode byteArrayNode = TruffleString.GetInternalByteArrayNode + .create(); + @Specialization( guards = { "libFormat.isRubyString(format)", - "equalNode.execute(libFormat.getRope(format), cachedFormatRope)" }, + "equalNode.execute(libFormat, format, cachedFormat, cachedEncoding)" }, limit = "2") protected Object typesCached(VirtualFrame frame, Object format, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFormat, - @Cached("libFormat.getRope(format)") Rope cachedFormatRope, - @Cached("compileArgTypes(format, libFormat)") RubyArray cachedTypes, - @Cached RopeNodes.EqualNode equalNode) { + @Cached RubyStringLibrary libFormat, + @Cached("asTruffleStringUncached(format)") TruffleString cachedFormat, + @Cached("libFormat.getEncoding(format)") RubyEncoding cachedEncoding, + @Cached("compileArgTypes(cachedFormat, cachedEncoding, byteArrayNode)") RubyArray cachedTypes, + @Cached StringHelperNodes.EqualSameEncodingNode equalNode) { return cachedTypes; } - @Specialization(guards = "libFormat.isRubyString(format)") - protected RubyArray typesUncachd(VirtualFrame frame, Object format, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFormat) { - return compileArgTypes(format, libFormat); + @Specialization(guards = "libFormat.isRubyString(format)", limit = "1") + protected RubyArray typesUncached(VirtualFrame frame, Object format, + @Cached RubyStringLibrary libFormat) { + return compileArgTypes(libFormat.getTString(format), libFormat.getEncoding(format), byteArrayNode); } @TruffleBoundary - protected RubyArray compileArgTypes(Object format, RubyStringLibrary libFormat) { + protected RubyArray compileArgTypes(AbstractTruffleString format, RubyEncoding encoding, + TruffleString.GetInternalByteArrayNode byteArrayNode) { try { return new RBSprintfCompiler(getLanguage(), this) - .typeList(libFormat.getRope(format), getContext(), getLanguage()); + .typeList(format, encoding, byteArrayNode, getContext(), getLanguage()); } catch (InvalidFormatException e) { throw new RaiseException(getContext(), coreExceptions().argumentError(e.getMessage(), this)); } @@ -1756,11 +1744,10 @@ protected RubyArray compileArgTypes(Object format, RubyStringLibrary libFormat) } @CoreMethod(names = "rb_tr_sprintf", onSingleton = true, required = 3) - @ImportStatic({ StringCachingGuards.class, StringOperations.class }) @ReportPolymorphism public abstract static class RBSprintfNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode; + @Child private TruffleString.FromByteArrayNode fromByteArrayNode; private final BranchProfile exceptionProfile = BranchProfile.create(); private final ConditionProfile resizeProfile = ConditionProfile.create(); @@ -1768,24 +1755,23 @@ public abstract static class RBSprintfNode extends CoreMethodArrayArgumentsNode @Specialization( guards = { "libFormat.isRubyString(format)", - "equalNode.execute(libFormat.getRope(format), cachedFormatRope)" }, + "equalNode.execute(libFormat, format, cachedFormat, cachedEncoding)" }, limit = "2") - protected RubyString formatCached(VirtualFrame frame, Object format, Object stringReader, RubyArray argArray, + protected RubyString formatCached(Object format, Object stringReader, RubyArray argArray, @Cached TranslateInteropExceptionNode translateInteropExceptionNode, @Cached ArrayToObjectArrayNode arrayToObjectArrayNode, @Cached WrapNode wrapNode, @Cached UnwrapNode unwrapNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFormat, - @Cached("libFormat.getRope(format)") Rope cachedFormatRope, - @Cached("cachedFormatRope.byteLength()") int cachedFormatLength, - @Cached("create(compileFormat(format, libFormat, stringReader))") DirectCallNode formatNode, - @Cached RopeNodes.EqualNode equalNode) { + @Cached RubyStringLibrary libFormat, + @Cached("asTruffleStringUncached(format)") TruffleString cachedFormat, + @Cached("libFormat.getEncoding(format)") RubyEncoding cachedEncoding, + @Cached("cachedFormat.byteLength(cachedEncoding.tencoding)") int cachedFormatLength, + @Cached("create(compileFormat(cachedFormat, cachedEncoding, stringReader))") DirectCallNode formatNode, + @Cached StringHelperNodes.EqualSameEncodingNode equalNode) { final BytesResult result; final Object[] arguments = arrayToObjectArrayNode.executeToObjectArray(argArray); try { - result = (BytesResult) formatNode - .call( - new Object[]{ arguments, arguments.length, null }); + result = (BytesResult) formatNode.call(new Object[]{ arguments, arguments.length, null }); } catch (FormatException e) { exceptionProfile.enter(); throw FormatExceptionTranslator.translate(getContext(), this, e); @@ -1796,27 +1782,27 @@ protected RubyString formatCached(VirtualFrame frame, Object format, Object stri @Specialization( guards = "libFormat.isRubyString(format)", - replaces = "formatCached") - protected RubyString formatUncached(VirtualFrame frame, Object format, Object stringReader, RubyArray argArray, + replaces = "formatCached", limit = "1") + protected RubyString formatUncached(Object format, Object stringReader, RubyArray argArray, @Cached TranslateInteropExceptionNode translateInteropExceptionNode, @Cached WrapNode wrapNode, @Cached UnwrapNode unwrapNode, @Cached IndirectCallNode formatNode, @Cached ArrayToObjectArrayNode arrayToObjectArrayNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFormat) { - final BytesResult result; + @Cached RubyStringLibrary libFormat) { + var tstring = libFormat.getTString(format); + var encoding = libFormat.getEncoding(format); final Object[] arguments = arrayToObjectArrayNode.executeToObjectArray(argArray); + final BytesResult result; try { - result = (BytesResult) formatNode - .call( - compileFormat(format, libFormat, stringReader), - new Object[]{ arguments, arguments.length, null }); + result = (BytesResult) formatNode.call(compileFormat(tstring, encoding, stringReader), + new Object[]{ arguments, arguments.length, null }); } catch (FormatException e) { exceptionProfile.enter(); throw FormatExceptionTranslator.translate(getContext(), this, e); } - return finishFormat(libFormat.getRope(format).byteLength(), result); + return finishFormat(tstring.byteLength(encoding.tencoding), result); } private RubyString finishFormat(int formatLength, BytesResult result) { @@ -1826,23 +1812,20 @@ private RubyString finishFormat(int formatLength, BytesResult result) { bytes = Arrays.copyOf(bytes, result.getOutputLength()); } - if (makeStringNode == null) { + if (fromByteArrayNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); - makeStringNode = insert(StringNodes.MakeStringNode.create()); + fromByteArrayNode = insert(TruffleString.FromByteArrayNode.create()); } - return makeStringNode - .executeMake( - bytes, - result.getEncoding().getEncodingForLength(formatLength), - result.getStringCodeRange()); + return createString(fromByteArrayNode, bytes, result.getEncoding().getEncodingForLength(formatLength)); } @TruffleBoundary - protected RootCallTarget compileFormat(Object format, RubyStringLibrary libFormat, Object stringReader) { + protected RootCallTarget compileFormat(AbstractTruffleString format, RubyEncoding encoding, + Object stringReader) { try { return new RBSprintfCompiler(getLanguage(), this) - .compile(libFormat.getRope(format), stringReader); + .compile(format, encoding, stringReader); } catch (InvalidFormatException e) { throw new RaiseException(getContext(), coreExceptions().argumentError(e.getMessage(), this)); } diff --git a/src/main/java/org/truffleruby/cext/UnwrapNode.java b/src/main/java/org/truffleruby/cext/UnwrapNode.java index 6beb23771d55..9bb4826c7149 100644 --- a/src/main/java/org/truffleruby/cext/UnwrapNode.java +++ b/src/main/java/org/truffleruby/cext/UnwrapNode.java @@ -85,19 +85,19 @@ protected Object unwrapTaggedObject(long handle, return wrapper.getObject(); } - @TruffleBoundary - private void raiseError(long handle) { - throw CompilerDirectives.shouldNotReachHere("dead handle 0x" + Long.toHexString(handle)); - } - @Fallback - @TruffleBoundary protected ValueWrapper unWrapUnexpectedHandle(long handle) { // Avoid throwing a specialization exception when given an uninitialized or corrupt // handle. + CompilerDirectives.transferToInterpreterAndInvalidate(); throw CompilerDirectives.shouldNotReachHere("corrupt handle 0x" + Long.toHexString(handle)); } + @TruffleBoundary + private void raiseError(long handle) { + throw CompilerDirectives.shouldNotReachHere("dead handle 0x" + Long.toHexString(handle)); + } + public static UnwrapNativeNode create() { return UnwrapNativeNodeGen.create(); } diff --git a/src/main/java/org/truffleruby/cext/WrapNode.java b/src/main/java/org/truffleruby/cext/WrapNode.java index 13fb940d51d8..581775510f09 100644 --- a/src/main/java/org/truffleruby/cext/WrapNode.java +++ b/src/main/java/org/truffleruby/cext/WrapNode.java @@ -12,10 +12,9 @@ import static org.truffleruby.cext.ValueWrapperManager.LONG_TAG; import static org.truffleruby.cext.ValueWrapperManager.UNSET_HANDLE; -import org.jcodings.specific.UTF8Encoding; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.Layouts; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.language.ImmutableRubyObject; import org.truffleruby.language.Nil; import org.truffleruby.language.NotProvided; @@ -71,13 +70,10 @@ protected ValueWrapper wrapUndef(NotProvided value) { } @Specialization - protected ValueWrapper wrapWrappedValue(ValueWrapper value) { - throw new RaiseException( - getContext(), - coreExceptions().argumentError( - RopeOperations.encodeAscii("Wrapping wrapped object", UTF8Encoding.INSTANCE), - Encodings.UTF_8, - this)); + protected ValueWrapper wrapWrappedValue(ValueWrapper value, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { + var message = createString(fromJavaStringNode, "Wrapping wrapped object", Encodings.UTF_8); + throw new RaiseException(getContext(), coreExceptions().argumentError(message, this, null)); } @Specialization diff --git a/src/main/java/org/truffleruby/collections/ByteArrayBuilder.java b/src/main/java/org/truffleruby/collections/ByteArrayBuilder.java index c9b93afb06bc..6c04091b41cd 100644 --- a/src/main/java/org/truffleruby/collections/ByteArrayBuilder.java +++ b/src/main/java/org/truffleruby/collections/ByteArrayBuilder.java @@ -9,14 +9,19 @@ */ package org.truffleruby.collections; -import org.truffleruby.core.rope.RopeConstants; +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.TruffleString; +import org.truffleruby.core.array.ArrayUtils; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.string.TStringWithEncoding; import java.nio.charset.StandardCharsets; import java.util.Arrays; public class ByteArrayBuilder { - private static final byte[] EMPTY_BYTES = RopeConstants.EMPTY_BYTES; + private static final byte[] EMPTY_BYTES = ArrayUtils.EMPTY_BYTES; private byte[] bytes = EMPTY_BYTES; private int length; @@ -28,6 +33,12 @@ public ByteArrayBuilder(int size) { bytes = new byte[size]; } + public static ByteArrayBuilder create(InternalByteArray bytes) { + final ByteArrayBuilder builder = new ByteArrayBuilder(bytes.getLength()); + builder.append(bytes.getArray(), bytes.getOffset(), bytes.getLength()); + return builder; + } + public static ByteArrayBuilder createUnsafeBuilder(byte[] wrap) { final ByteArrayBuilder builder = new ByteArrayBuilder(); builder.unsafeReplace(wrap, wrap.length); @@ -70,6 +81,26 @@ public void append(byte[] appendBytes, int appendStart, int appendLength) { length += appendLength; } + public void append(InternalByteArray bytes) { + append(bytes.getArray(), bytes.getOffset(), bytes.getLength()); + } + + public void append(InternalByteArray bytes, int appendStart, int appendLength) { + append(bytes.getArray(), bytes.getOffset() + appendStart, appendLength); + } + + public void append(TStringWithEncoding other) { + append(other.getInternalByteArray()); + } + + public void append(TStringWithEncoding other, int appendStart, int appendLength) { + append(other.getInternalByteArray(), appendStart, appendLength); + } + + public void append(TruffleString other, RubyEncoding enc) { + append(other.getInternalByteArrayUncached(enc.tencoding)); + } + public void unsafeReplace(byte[] bytes, int size) { this.bytes = bytes; this.length = size; @@ -103,6 +134,14 @@ public String toString() { return new String(bytes, 0, length, StandardCharsets.ISO_8859_1); } + public TruffleString toTString(RubyEncoding encoding) { + return TStringUtils.fromByteArray(getBytes(), encoding); + } + + public TStringWithEncoding toTStringWithEnc(RubyEncoding encoding) { + return new TStringWithEncoding(TStringUtils.fromByteArray(getBytes(), encoding), encoding); + } + // TODO CS 14-Feb-17 review all uses of this method public byte[] getUnsafeBytes() { return bytes; diff --git a/src/main/java/org/truffleruby/collections/WeakValueCache.java b/src/main/java/org/truffleruby/collections/WeakValueCache.java index 7780b4388787..6a365b4f1c28 100644 --- a/src/main/java/org/truffleruby/collections/WeakValueCache.java +++ b/src/main/java/org/truffleruby/collections/WeakValueCache.java @@ -48,7 +48,7 @@ * Callers must hold to the returned value. The entry will stay in the map as long as the value is referenced. */ public class WeakValueCache implements ReHashable { - private Map> map; + private ConcurrentHashMap> map; private final ReferenceQueue deadRefs = new ReferenceQueue<>(); @TruffleBoundary diff --git a/src/main/java/org/truffleruby/core/CoreLibrary.java b/src/main/java/org/truffleruby/core/CoreLibrary.java index 36919fd7cde5..e45ca6503541 100644 --- a/src/main/java/org/truffleruby/core/CoreLibrary.java +++ b/src/main/java/org/truffleruby/core/CoreLibrary.java @@ -24,8 +24,8 @@ import com.oracle.truffle.api.RootCallTarget; import com.oracle.truffle.api.exception.AbstractTruffleException; +import com.oracle.truffle.api.strings.TruffleString; import org.graalvm.collections.Pair; -import org.jcodings.specific.USASCIIEncoding; import org.jcodings.transcode.EConvFlags; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; @@ -38,16 +38,15 @@ import org.truffleruby.core.basicobject.RubyBasicObject; import org.truffleruby.core.binding.RubyBinding; import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.klass.ClassNodes; import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.module.ModuleNodes; import org.truffleruby.core.module.RubyModule; import org.truffleruby.core.numeric.BigIntegerOps; import org.truffleruby.core.numeric.RubyBignum; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringOperations; import org.truffleruby.debug.BindingLocalVariablesObject; import org.truffleruby.debug.GlobalVariablesObject; import org.truffleruby.debug.TopScopeObject; @@ -722,8 +721,7 @@ private void setConstant(RubyModule module, String name, Object value) { } private ImmutableRubyString frozenUSASCIIString(String string) { - return language.getFrozenStringLiteral( - StringOperations.encodeRope(string, USASCIIEncoding.INSTANCE, CodeRange.CR_7BIT)); + return language.getFrozenStringLiteral(TStringUtils.usAsciiString(string), Encodings.US_ASCII); } private RubyClass defineClass(String name) { @@ -761,7 +759,7 @@ public void loadRubyCoreLibraryAndPostBoot() { state = State.LOADED; } - final Pair sourceRopePair = loadCoreFileSource(language.coreLoadPath + file); + var sourceRopePair = loadCoreFileSource(language.coreLoadPath + file); final Source source = sourceRopePair.getLeft(); final RootCallTarget callTarget = context.getCodeLoader().parseTopLevelWithCache(sourceRopePair, node); @@ -786,7 +784,7 @@ public void loadRubyCoreLibraryAndPostBoot() { } } - public Pair loadCoreFileSource(String path) throws IOException { + public Pair loadCoreFileSource(String path) throws IOException { if (path.startsWith(RubyLanguage.RESOURCE_SCHEME)) { if (TruffleOptions.AOT || ParserCache.INSTANCE != null) { final RootParseNode rootParseNode = ParserCache.INSTANCE.get(path); @@ -815,10 +813,11 @@ private void afterLoadCoreLibrary() { // Initialize $0 so it is set to a String as RubyGems expect, also when not run from the RubyLauncher // NOTE(norswap, Nov. 2nd 2020): Okay for language access to be slow, currently only used during initialization. - RubyString dollarZeroValue = StringOperations.createString( - context, - language, - StringOperations.encodeRope("-", USASCIIEncoding.INSTANCE, CodeRange.CR_7BIT), + RubyString dollarZeroValue = new RubyString( + stringClass, + language.stringShape, + false, + TruffleString.fromCodePointUncached('-', TruffleString.Encoding.US_ASCII), Encodings.US_ASCII); int index = language.getGlobalVariableIndex("$0"); context.getGlobalVariableStorage(index).setValueInternal(dollarZeroValue); diff --git a/src/main/java/org/truffleruby/core/GCNodes.java b/src/main/java/org/truffleruby/core/GCNodes.java index 13fdd4689f01..a00bd8b5b232 100644 --- a/src/main/java/org/truffleruby/core/GCNodes.java +++ b/src/main/java/org/truffleruby/core/GCNodes.java @@ -18,6 +18,7 @@ import com.oracle.truffle.api.TruffleSafepoint; import com.oracle.truffle.api.dsl.Cached; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.SuppressFBWarnings; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; @@ -30,8 +31,6 @@ import org.truffleruby.collections.WeakValueCache; import org.truffleruby.core.array.RubyArray; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.language.control.RaiseException; @CoreModule("GC") @@ -196,7 +195,7 @@ public abstract static class GCHeapStatsNode extends PrimitiveArrayArgumentsNode @TruffleBoundary @Specialization protected RubyArray heapStats( - @Cached StringNodes.MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { String[] memoryPoolNames = new String[0]; Object[] memoryPools; @@ -225,8 +224,7 @@ protected RubyArray heapStats( Object[] memoryPoolNamesCast = new Object[memoryPoolNames.length]; for (int i = 0; i < memoryPoolNames.length; i++) { - memoryPoolNamesCast[i] = makeStringNode - .executeMake(memoryPoolNames[i], Encodings.UTF_8, CodeRange.CR_UNKNOWN); + memoryPoolNamesCast[i] = createString(fromJavaStringNode, memoryPoolNames[i], Encodings.UTF_8); } diff --git a/src/main/java/org/truffleruby/core/TruffleSystemNodes.java b/src/main/java/org/truffleruby/core/TruffleSystemNodes.java index 0225e0282bd0..970e5dc482ac 100644 --- a/src/main/java/org/truffleruby/core/TruffleSystemNodes.java +++ b/src/main/java/org/truffleruby/core/TruffleSystemNodes.java @@ -43,7 +43,7 @@ import java.util.Set; import java.util.logging.Level; -import com.oracle.truffle.api.library.CachedLibrary; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyLanguage; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; @@ -54,12 +54,12 @@ import org.truffleruby.core.array.RubyArray; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes.MakeStringNode; import org.truffleruby.core.string.StringUtils; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.interop.FromJavaStringNode; +import org.truffleruby.interop.ToJavaStringNode; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.control.RaiseException; import org.truffleruby.language.library.RubyStringLibrary; import org.truffleruby.platform.Platform; @@ -77,7 +77,7 @@ public abstract class TruffleSystemNodes { @CoreMethod(names = "initial_environment_variables", onSingleton = true) public abstract static class InitEnvVarsNode extends CoreMethodNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -88,7 +88,7 @@ protected RubyArray envVars() { final Object[] store = new Object[size]; int i = 0; for (String variable : variables) { - store[i++] = makeStringNode.executeMake(variable, localeRubyEncoding, CodeRange.CR_UNKNOWN); + store[i++] = createString(fromJavaStringNode, variable, localeRubyEncoding); } return createArray(store); } @@ -98,12 +98,13 @@ protected RubyArray envVars() { @Primitive(name = "java_get_env") public abstract static class JavaGetEnv extends CoreMethodArrayArgumentsNode { - @Specialization(guards = "strings.isRubyString(name)") + @Specialization(guards = "strings.isRubyString(name)", limit = "1") protected Object javaGetEnv(Object name, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, + @Cached RubyStringLibrary strings, + @Cached ToJavaStringNode toJavaStringNode, @Cached FromJavaStringNode fromJavaStringNode, @Cached ConditionProfile nullValueProfile) { - final String javaName = strings.getJavaString(name); + final String javaName = toJavaStringNode.executeToJavaString(name); final String value = getEnv(javaName); if (nullValueProfile.profile(value == null)) { @@ -124,12 +125,12 @@ private String getEnv(String name) { public abstract static class SetTruffleWorkingDirNode extends PrimitiveArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = "stringsDir.isRubyString(dir)") + @Specialization(guards = "stringsDir.isRubyString(dir)", limit = "1") protected Object setTruffleWorkingDir(Object dir, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsDir) { + @Cached RubyStringLibrary stringsDir) { TruffleFile truffleFile = getContext() .getEnv() - .getPublicTruffleFile(stringsDir.getJavaString(dir)); + .getPublicTruffleFile(RubyGuards.getJavaString(dir)); final TruffleFile canonicalFile; try { canonicalFile = truffleFile.getCanonicalFile(); @@ -149,10 +150,10 @@ protected Object setTruffleWorkingDir(Object dir, public abstract static class GetTruffleWorkingDirNode extends PrimitiveArrayArgumentsNode { @Specialization protected RubyString getTruffleWorkingDir( - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { final String cwd = getContext().getFeatureLoader().getWorkingDirectory(); final RubyEncoding externalRubyEncoding = getContext().getEncodingManager().getDefaultExternalEncoding(); - return makeStringNode.executeMake(cwd, externalRubyEncoding, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, cwd, externalRubyEncoding); } } @@ -160,11 +161,11 @@ protected RubyString getTruffleWorkingDir( public abstract static class GetJavaPropertiesNode extends CoreMethodArrayArgumentsNode { @Specialization protected Object getJavaProperties( - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { String[] properties = getProperties(); Object[] array = new Object[properties.length]; for (int i = 0; i < properties.length; i++) { - array[i] = makeStringNode.executeMake(properties[i], Encodings.UTF_8, CodeRange.CR_UNKNOWN); + array[i] = createString(fromJavaStringNode, properties[i], Encodings.UTF_8); } return createArray(array); } @@ -178,16 +179,17 @@ private static String[] getProperties() { @CoreMethod(names = "get_java_property", onSingleton = true, required = 1) public abstract static class GetJavaPropertyNode extends CoreMethodArrayArgumentsNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); - @Specialization(guards = "strings.isRubyString(property)") + @Specialization(guards = "strings.isRubyString(property)", limit = "1") protected Object getJavaProperty(Object property, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - String value = getProperty(strings.getJavaString(property)); + @Cached RubyStringLibrary strings, + @Cached ToJavaStringNode toJavaStringNode) { + String value = getProperty(toJavaStringNode.executeToJavaString(property)); if (value == null) { return nil; } else { - return makeStringNode.executeMake(value, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, value, Encodings.UTF_8); } } @@ -200,11 +202,11 @@ private static String getProperty(String key) { @CoreMethod(names = "host_cpu", onSingleton = true) public abstract static class HostCPUNode extends CoreMethodNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @Specialization protected RubyString hostCPU() { - return makeStringNode.executeMake(BasicPlatform.getArchName(), Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, BasicPlatform.getArchName(), Encodings.UTF_8); } } @@ -212,11 +214,11 @@ protected RubyString hostCPU() { @CoreMethod(names = "host_os", onSingleton = true) public abstract static class HostOSNode extends CoreMethodNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @Specialization protected RubyString hostOS() { - return makeStringNode.executeMake(Platform.getOSName(), Encodings.UTF_8, CodeRange.CR_7BIT); + return createString(fromJavaStringNode, Platform.getOSName(), Encodings.UTF_8); } } @@ -224,19 +226,21 @@ protected RubyString hostOS() { @CoreMethod(names = "log", onSingleton = true, required = 2) public abstract static class LogNode extends CoreMethodArrayArgumentsNode { - @Specialization(guards = { "strings.isRubyString(message)", "level == cachedLevel" }) + @Specialization(guards = { "strings.isRubyString(message)", "level == cachedLevel" }, limit = "3") protected Object logCached(RubySymbol level, Object message, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, + @Cached RubyStringLibrary strings, + @Cached ToJavaStringNode toJavaStringNode, @Cached("level") RubySymbol cachedLevel, @Cached("getLevel(cachedLevel)") Level javaLevel) { - log(javaLevel, strings.getJavaString(message)); + log(javaLevel, toJavaStringNode.executeToJavaString(message)); return nil; } - @Specialization(guards = "strings.isRubyString(message)", replaces = "logCached") + @Specialization(guards = "strings.isRubyString(message)", replaces = "logCached", limit = "1") protected Object log(RubySymbol level, Object message, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - log(getLevel(level), strings.getJavaString(message)); + @Cached RubyStringLibrary strings, + @Cached ToJavaStringNode toJavaStringNode) { + log(getLevel(level), toJavaStringNode.executeToJavaString(message)); return nil; } diff --git a/src/main/java/org/truffleruby/core/VMPrimitiveNodes.java b/src/main/java/org/truffleruby/core/VMPrimitiveNodes.java index 5172956e3bfd..3cfb2e74c3b2 100644 --- a/src/main/java/org/truffleruby/core/VMPrimitiveNodes.java +++ b/src/main/java/org/truffleruby/core/VMPrimitiveNodes.java @@ -46,6 +46,7 @@ import com.oracle.truffle.api.interop.UnsupportedMessageException; import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.nodes.Node; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.builtins.CoreModule; @@ -64,14 +65,13 @@ import org.truffleruby.core.numeric.RubyBignum; import org.truffleruby.core.proc.ProcOperations; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes.MakeStringNode; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.core.thread.RubyThread; import org.truffleruby.extra.ffi.Pointer; import org.truffleruby.interop.TranslateInteropExceptionNode; import org.truffleruby.language.RubyDynamicObject; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.SafepointAction; import org.truffleruby.language.arguments.ArgumentsDescriptor; import org.truffleruby.language.arguments.EmptyArgumentsDescriptor; @@ -268,12 +268,13 @@ protected Object doThrow(Object tag, Object value) { public abstract static class VMWatchSignalNode extends PrimitiveArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = { "libSignalString.isRubyString(signalString)", "libAction.isRubyString(action)" }) + @Specialization(guards = { "libSignalString.isRubyString(signalString)", "libAction.isRubyString(action)" }, + limit = "1") protected boolean watchSignalString(Object signalString, boolean isRubyDefaultHandler, Object action, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libSignalString, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libAction) { - final String actionString = libAction.getJavaString(action); - final String signalName = libSignalString.getJavaString(signalString); + @Cached RubyStringLibrary libSignalString, + @Cached RubyStringLibrary libAction) { + final String actionString = RubyGuards.getJavaString(action); + final String signalName = RubyGuards.getJavaString(signalString); switch (actionString) { case "DEFAULT": @@ -288,9 +289,9 @@ protected boolean watchSignalString(Object signalString, boolean isRubyDefaultHa } @TruffleBoundary - @Specialization(guards = "libSignalString.isRubyString(signalString)") + @Specialization(guards = "libSignalString.isRubyString(signalString)", limit = "1") protected boolean watchSignalProc(Object signalString, boolean isRubyDefaultHandler, RubyProc action, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libSignalString) { + @Cached RubyStringLibrary libSignalString) { final RubyContext context = getContext(); if (getLanguage().getCurrentThread() != context.getThreadManager().getRootThread()) { @@ -298,7 +299,7 @@ protected boolean watchSignalProc(Object signalString, boolean isRubyDefaultHand SharedObjects.writeBarrier(getLanguage(), action); } - final String signalName = libSignalString.getJavaString(signalString); + final String signalName = RubyGuards.getJavaString(signalString); return registerHandler(signalName, signal -> { final RubyThread rootThread = context.getThreadManager().getRootThread(); @@ -383,9 +384,8 @@ public abstract static class VMGetConfigItemNode extends PrimitiveArrayArguments @TruffleBoundary @Specialization - protected Object get(Object key, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary library) { - final String keyString = library.getJavaString(key); + protected Object get(Object key) { + final String keyString = RubyGuards.getJavaString(key); final Object value = getContext().getNativeConfiguration().get(keyString); if (value == null) { @@ -403,14 +403,12 @@ public abstract static class VMGetConfigSectionNode extends PrimitiveArrayArgume @TruffleBoundary @Specialization protected Object getSection(Object section, RubyProc block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libSection, - @Cached MakeStringNode makeStringNode, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode, @Cached CallBlockNode yieldNode) { for (Entry entry : getContext() .getNativeConfiguration() - .getSection(libSection.getJavaString(section))) { - final RubyString key = makeStringNode - .executeMake(entry.getKey(), Encodings.UTF_8, CodeRange.CR_7BIT); + .getSection(RubyGuards.getJavaString(section))) { + final RubyString key = createString(fromJavaStringNode, entry.getKey(), Encodings.UTF_8); // CR_7BIT yieldNode.yield(block, key, entry.getValue()); } @@ -439,10 +437,10 @@ public abstract static class VMDevUrandomBytes extends PrimitiveArrayArgumentsNo @Specialization(guards = "count >= 0") protected RubyString readRandomBytes(int count, - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { final byte[] bytes = getContext().getRandomSeedBytes(count); - return makeStringNode.executeMake(bytes, Encodings.BINARY, CodeRange.CR_UNKNOWN); + return createString(fromByteArrayNode, bytes, Encodings.BINARY); } @Specialization(guards = "count < 0") @@ -450,9 +448,7 @@ protected RubyString negativeCount(int count) { throw new RaiseException( getContext(), getContext().getCoreExceptions().argumentError( - coreStrings().NEGATIVE_STRING_SIZE.getRope(), - Encodings.BINARY, - this)); + coreStrings().NEGATIVE_STRING_SIZE.createInstance(getContext()), this, null)); } } @@ -565,10 +561,11 @@ protected Object initStackOverflowClassesEagerly() { @Primitive(name = "should_not_reach_here") public abstract static class ShouldNotReachHereNode extends PrimitiveArrayArgumentsNode { - @Specialization(guards = "libString.isRubyString(message)") + @Specialization(guards = "libString.isRubyString(message)", limit = "1") protected Object shouldNotReachHere(Object message, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - throw CompilerDirectives.shouldNotReachHere(libString.getJavaString(message)); + @Cached RubyStringLibrary libString) { + CompilerDirectives.transferToInterpreterAndInvalidate(); + throw CompilerDirectives.shouldNotReachHere(RubyGuards.getJavaString(message)); } } diff --git a/src/main/java/org/truffleruby/core/adapters/OutputStreamAdapter.java b/src/main/java/org/truffleruby/core/adapters/OutputStreamAdapter.java index dfefa01e571a..a3713841f67f 100644 --- a/src/main/java/org/truffleruby/core/adapters/OutputStreamAdapter.java +++ b/src/main/java/org/truffleruby/core/adapters/OutputStreamAdapter.java @@ -14,9 +14,8 @@ import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.string.RubyString; import org.truffleruby.core.support.RubyIO; import org.truffleruby.language.dispatch.DispatchNode; @@ -36,8 +35,15 @@ public OutputStreamAdapter(RubyContext context, RubyLanguage language, RubyIO ob @Override public void write(int bite) { - DispatchNode.getUncached().call(object, "write", StringOperations.createString(context, language, - RopeOperations.create((byte) bite, encoding.jcoding, CodeRange.CR_UNKNOWN), encoding)); + DispatchNode.getUncached().call( + object, + "write", + new RubyString( + context.getCoreLibrary().stringClass, + language.stringShape, + false, + TStringUtils.fromByteArray(new byte[]{ (byte) bite }, encoding), + encoding)); } } diff --git a/src/main/java/org/truffleruby/core/array/ArrayNodes.java b/src/main/java/org/truffleruby/core/array/ArrayNodes.java index 9c52435a1875..94e81a764df3 100644 --- a/src/main/java/org/truffleruby/core/array/ArrayNodes.java +++ b/src/main/java/org/truffleruby/core/array/ArrayNodes.java @@ -18,6 +18,7 @@ import com.oracle.truffle.api.TruffleSafepoint; import com.oracle.truffle.api.object.Shape; import com.oracle.truffle.api.profiles.LoopConditionProfile; +import com.oracle.truffle.api.strings.TruffleString; import org.graalvm.collections.EconomicSet; import org.graalvm.collections.Equivalence; import org.truffleruby.Layouts; @@ -60,13 +61,8 @@ import org.truffleruby.core.numeric.FixnumLowerNode; import org.truffleruby.core.proc.RubyProc; import org.truffleruby.core.range.RangeNodes.NormalizedStartLengthNode; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringCachingGuards; -import org.truffleruby.core.string.StringNodes; -import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.string.StringHelperNodes; import org.truffleruby.core.support.TypeNodes; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.extra.ffi.Pointer; @@ -1539,12 +1535,10 @@ public void accept(RubyArray array, RubyProc block, Object element, int index) { @NodeChild(value = "array", type = RubyNode.class) @NodeChild(value = "format", type = RubyBaseNodeWithExecute.class) @CoreMethod(names = "pack", required = 1) - @ImportStatic({ StringCachingGuards.class, StringOperations.class }) @ReportPolymorphism public abstract static class PackNode extends CoreMethodNode { - @Child private RopeNodes.MakeLeafRopeNode makeLeafRopeNode; - @Child private StringNodes.MakeStringNode makeStringNode; + @Child private TruffleString.FromByteArrayNode fromByteArrayNode = TruffleString.FromByteArrayNode.create(); @Child private WriteObjectFieldNode writeAssociatedNode; private final BranchProfile exceptionProfile = BranchProfile.create(); @@ -1558,16 +1552,16 @@ protected ToStrNode coerceFormat(RubyBaseNodeWithExecute format) { @Specialization( guards = { "libFormat.isRubyString(format)", - "equalNode.execute(libFormat.getRope(format), cachedFormat)" }, + "equalNode.execute(libFormat, format, cachedFormat, cachedEncoding)" }, limit = "getCacheLimit()") protected RubyString packCached(RubyArray array, Object format, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFormat, - @Cached("libFormat.getRope(format)") Rope cachedFormat, - @Cached("cachedFormat.byteLength()") int cachedFormatLength, - @Cached("create(compileFormat(libFormat.getRope(format)))") DirectCallNode callPackNode, - @Cached RopeNodes.EqualNode equalNode) { + @Cached RubyStringLibrary libFormat, + @Cached("asTruffleStringUncached(format)") TruffleString cachedFormat, + @Cached("libFormat.getEncoding(format)") RubyEncoding cachedEncoding, + @Cached("cachedFormat.byteLength(cachedEncoding.tencoding)") int cachedFormatLength, + @Cached("create(compileFormat(getJavaString(format)))") DirectCallNode callPackNode, + @Cached StringHelperNodes.EqualNode equalNode) { final BytesResult result; - try { result = (BytesResult) callPackNode.call( new Object[]{ array.getStore(), array.size, false, null }); @@ -1579,13 +1573,14 @@ protected RubyString packCached(RubyArray array, Object format, return finishPack(cachedFormatLength, result); } - @Specialization(guards = { "libFormat.isRubyString(format)" }, replaces = "packCached") + @Specialization(guards = { "libFormat.isRubyString(format)" }, replaces = "packCached", limit = "1") protected RubyString packUncached(RubyArray array, Object format, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFormat, + @Cached RubyStringLibrary libFormat, + @Cached ToJavaStringNode toJavaStringNode, @Cached IndirectCallNode callPackNode) { - final BytesResult result; + final String formatRope = toJavaStringNode.executeToJavaString(format); - final Rope formatRope = libFormat.getRope(format); + final BytesResult result; try { result = (BytesResult) callPackNode.call( compileFormat(formatRope), @@ -1595,7 +1590,8 @@ protected RubyString packUncached(RubyArray array, Object format, throw FormatExceptionTranslator.translate(getContext(), this, e); } - return finishPack(formatRope.byteLength(), result); + int formatLength = libFormat.getTString(format).byteLength(libFormat.getTEncoding(format)); + return finishPack(formatLength, result); } private RubyString finishPack(int formatLength, BytesResult result) { @@ -1605,24 +1601,8 @@ private RubyString finishPack(int formatLength, BytesResult result) { bytes = Arrays.copyOf(bytes, result.getOutputLength()); } - if (makeLeafRopeNode == null) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - makeLeafRopeNode = insert(RopeNodes.MakeLeafRopeNode.create()); - } - - if (makeStringNode == null) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - makeStringNode = insert(StringNodes.MakeStringNode.create()); - } - final RubyEncoding rubyEncoding = result.getEncoding().getEncodingForLength(formatLength); - final RubyString string = makeStringNode.fromRope( - makeLeafRopeNode.executeMake( - bytes, - rubyEncoding.jcoding, - result.getStringCodeRange(), - result.getStringLength()), - rubyEncoding); + final RubyString string = createString(fromByteArrayNode, bytes, rubyEncoding); if (result.getAssociated() != null) { if (writeAssociatedNode == null) { @@ -1637,10 +1617,9 @@ private RubyString finishPack(int formatLength, BytesResult result) { } @TruffleBoundary - protected RootCallTarget compileFormat(Rope rope) { - final String javaString = RopeOperations.decodeRope(rope); + protected RootCallTarget compileFormat(String format) { try { - return new PackCompiler(getLanguage(), this).compile(javaString); + return new PackCompiler(getLanguage(), this).compile(format); } catch (DeferredRaiseException dre) { throw dre.getException(getContext()); } diff --git a/src/main/java/org/truffleruby/core/array/ArrayUtils.java b/src/main/java/org/truffleruby/core/array/ArrayUtils.java index 3e3fc7809a09..be9c773628f6 100644 --- a/src/main/java/org/truffleruby/core/array/ArrayUtils.java +++ b/src/main/java/org/truffleruby/core/array/ArrayUtils.java @@ -24,6 +24,7 @@ public abstract class ArrayUtils { public static final Object[] EMPTY_ARRAY = new Object[0]; + public static final byte[] EMPTY_BYTES = new byte[0]; public static boolean assertValidElements(Object[] array, int start, int count) { assert start <= array.length; diff --git a/src/main/java/org/truffleruby/core/basicobject/BasicObjectNodes.java b/src/main/java/org/truffleruby/core/basicobject/BasicObjectNodes.java index 5f418e3fbb94..a34d16282a01 100644 --- a/src/main/java/org/truffleruby/core/basicobject/BasicObjectNodes.java +++ b/src/main/java/org/truffleruby/core/basicobject/BasicObjectNodes.java @@ -13,6 +13,7 @@ import com.oracle.truffle.api.interop.InteropLibrary; import com.oracle.truffle.api.interop.UnsupportedMessageException; import com.oracle.truffle.api.object.Shape; +import com.oracle.truffle.api.strings.AbstractTruffleString; import org.truffleruby.Layouts; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; @@ -22,6 +23,7 @@ import org.truffleruby.core.basicobject.BasicObjectNodesFactory.ReferenceEqualNodeFactory; import org.truffleruby.core.cast.BooleanCastNode; import org.truffleruby.core.cast.NameToJavaStringNode; +import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.exception.ExceptionOperations.ExceptionFormatter; import org.truffleruby.core.exception.RubyException; import org.truffleruby.core.inlined.AlwaysInlinedMethodNode; @@ -30,9 +32,8 @@ import org.truffleruby.core.numeric.RubyBignum; import org.truffleruby.core.objectspace.ObjectSpaceManager; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.core.symbol.RubySymbol; +import org.truffleruby.interop.ToJavaStringNode; import org.truffleruby.interop.TranslateInteropExceptionNode; import org.truffleruby.language.ImmutableRubyObject; import org.truffleruby.language.LexicalScope; @@ -317,11 +318,13 @@ protected Object initialize(Frame callerFrame, Object self, Object[] rubyArgs, R @CoreMethod(names = "instance_eval", needsBlock = true, optional = 3, lowerFixnum = 3) public abstract static class InstanceEvalNode extends CoreMethodArrayArgumentsNode { - @Specialization(guards = { "strings.isRubyString(string)", "stringsFileName.isRubyString(fileName)" }) + @Specialization(guards = { "strings.isRubyString(string)", "stringsFileName.isRubyString(fileName)" }, + limit = "1") protected Object instanceEval( VirtualFrame frame, Object receiver, Object string, Object fileName, int line, Nil block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsFileName, + @Cached RubyStringLibrary strings, + @Cached RubyStringLibrary stringsFileName, + @Cached ToJavaStringNode toJavaStringNode, @Cached ReadCallerFrameNode callerFrameNode, @Cached IndirectCallNode callNode) { final MaterializedFrame callerFrame = callerFrameNode.execute(frame); @@ -329,17 +332,20 @@ protected Object instanceEval( return instanceEvalHelper( callerFrame, receiver, - strings.getRope(string), - stringsFileName.getRope(fileName), + strings.getTString(string), + strings.getEncoding(string), + toJavaStringNode.executeToJavaString(fileName), line, callNode); } - @Specialization(guards = { "strings.isRubyString(string)", "stringsFileName.isRubyString(fileName)" }) + @Specialization(guards = { "strings.isRubyString(string)", "stringsFileName.isRubyString(fileName)" }, + limit = "1") protected Object instanceEval( VirtualFrame frame, Object receiver, Object string, Object fileName, NotProvided line, Nil block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsFileName, + @Cached RubyStringLibrary strings, + @Cached RubyStringLibrary stringsFileName, + @Cached ToJavaStringNode toJavaStringNode, @Cached ReadCallerFrameNode callerFrameNode, @Cached IndirectCallNode callNode) { final MaterializedFrame callerFrame = callerFrameNode.execute(frame); @@ -347,16 +353,17 @@ protected Object instanceEval( return instanceEvalHelper( callerFrame, receiver, - strings.getRope(string), - stringsFileName.getRope(fileName), + strings.getTString(string), + strings.getEncoding(string), + toJavaStringNode.executeToJavaString(fileName), 1, callNode); } - @Specialization(guards = "strings.isRubyString(string)") + @Specialization(guards = "strings.isRubyString(string)", limit = "1") protected Object instanceEval( VirtualFrame frame, Object receiver, Object string, NotProvided fileName, NotProvided line, Nil block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, + @Cached RubyStringLibrary strings, @Cached ReadCallerFrameNode callerFrameNode, @Cached IndirectCallNode callNode) { final MaterializedFrame callerFrame = callerFrameNode.execute(frame); @@ -364,8 +371,9 @@ protected Object instanceEval( return instanceEvalHelper( callerFrame, receiver, - strings.getRope(string), - coreStrings().EVAL_FILENAME_STRING.createInstance(getContext()).rope, + strings.getTString(string), + strings.getEncoding(string), + coreStrings().EVAL_FILENAME_STRING.toString(), 1, callNode); } @@ -396,12 +404,11 @@ protected Object argsAndBlock( } @TruffleBoundary - private Object instanceEvalHelper(MaterializedFrame callerFrame, Object receiver, Rope stringRope, - Rope fileNameRope, int line, IndirectCallNode callNode) { - final String fileNameString = RopeOperations.decodeRope(fileNameRope); - + private Object instanceEvalHelper(MaterializedFrame callerFrame, Object receiver, AbstractTruffleString code, + RubyEncoding encoding, + String fileNameString, int line, IndirectCallNode callNode) { final RubySource source = EvalLoader - .createEvalSource(getContext(), stringRope, "instance_eval", fileNameString, line, this); + .createEvalSource(getContext(), code, encoding, "instance_eval", fileNameString, line, this); final LexicalScope lexicalScope = RubyArguments.getMethod(callerFrame).getLexicalScope(); final RootCallTarget callTarget = getContext().getCodeLoader().parse( diff --git a/src/main/java/org/truffleruby/core/binding/BindingNodes.java b/src/main/java/org/truffleruby/core/binding/BindingNodes.java index 3fe145ec049e..7ddcad668aa3 100644 --- a/src/main/java/org/truffleruby/core/binding/BindingNodes.java +++ b/src/main/java/org/truffleruby/core/binding/BindingNodes.java @@ -20,6 +20,7 @@ import com.oracle.truffle.api.frame.FrameInstance.FrameAccess; import com.oracle.truffle.api.frame.FrameSlotKind; import com.oracle.truffle.api.profiles.ConditionProfile; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.Layouts; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; @@ -33,9 +34,7 @@ import org.truffleruby.core.cast.NameToJavaStringNode; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.klass.RubyClass; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes.MakeStringNode; import org.truffleruby.language.CallStackManager; import org.truffleruby.language.RubyBaseNode; import org.truffleruby.language.RubyBaseNodeWithExecute; @@ -477,16 +476,15 @@ public abstract static class SourceLocationNode extends UnaryCoreMethodNode { @TruffleBoundary @Specialization protected Object sourceLocation(RubyBinding binding, - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { final SourceSection sourceSection = binding.sourceSection; if (sourceSection == null) { return nil; } else { - final RubyString file = makeStringNode.executeMake( + final RubyString file = createString(fromJavaStringNode, getLanguage().getSourcePath(sourceSection.getSource()), - Encodings.UTF_8, - CodeRange.CR_UNKNOWN); + Encodings.UTF_8); return createArray(new Object[]{ file, sourceSection.getStartLine() }); } } diff --git a/src/main/java/org/truffleruby/core/cast/NameToJavaStringNode.java b/src/main/java/org/truffleruby/core/cast/NameToJavaStringNode.java index dffda7a8f8c3..1588f022eb7a 100644 --- a/src/main/java/org/truffleruby/core/cast/NameToJavaStringNode.java +++ b/src/main/java/org/truffleruby/core/cast/NameToJavaStringNode.java @@ -9,9 +9,7 @@ */ package org.truffleruby.core.cast; -import com.oracle.truffle.api.library.CachedLibrary; -import org.truffleruby.core.string.StringCachingGuards; -import org.truffleruby.core.string.StringOperations; +import com.oracle.truffle.api.dsl.Cached.Shared; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.interop.ToJavaStringNode; import org.truffleruby.language.RubyBaseNodeWithExecute; @@ -22,14 +20,12 @@ import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.GenerateUncached; -import com.oracle.truffle.api.dsl.ImportStatic; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.profiles.BranchProfile; /** Converts a method name to a Java String. The exception message below assumes this conversion is done for a method * name. */ -@ImportStatic({ StringCachingGuards.class, StringOperations.class }) @GenerateUncached @NodeChild(value = "value", type = RubyBaseNodeWithExecute.class) public abstract class NameToJavaStringNode extends RubyBaseNodeWithExecute { @@ -48,16 +44,16 @@ public static NameToJavaStringNode getUncached() { public abstract String execute(Object name); - @Specialization(guards = "strings.isRubyString(value)") + @Specialization(guards = "strings.isRubyString(value)", limit = "1") protected String stringNameToJavaString(Object value, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached ToJavaStringNode toJavaStringNode) { + @Cached RubyStringLibrary strings, + @Cached @Shared("toJavaStringNode") ToJavaStringNode toJavaStringNode) { return toJavaStringNode.executeToJavaString(value); } @Specialization protected String symbolNameToJavaString(RubySymbol value, - @Cached ToJavaStringNode toJavaStringNode) { + @Cached @Shared("toJavaStringNode") ToJavaStringNode toJavaStringNode) { return toJavaStringNode.executeToJavaString(value); } @@ -70,7 +66,8 @@ protected String nameToJavaString(String value) { protected String nameToJavaString(Object object, @Cached BranchProfile errorProfile, @Cached DispatchNode toStr, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached RubyStringLibrary libString, + @Cached @Shared("toJavaStringNode") ToJavaStringNode toJavaStringNode) { final Object coerced; try { coerced = toStr.call(object, "to_str"); @@ -86,7 +83,7 @@ protected String nameToJavaString(Object object, } if (libString.isRubyString(coerced)) { - return libString.getJavaString(coerced); + return toJavaStringNode.executeToJavaString(coerced); } else { errorProfile.enter(); throw new RaiseException(getContext(), coreExceptions().typeErrorBadCoercion( diff --git a/src/main/java/org/truffleruby/core/cast/StringToSymbolNode.java b/src/main/java/org/truffleruby/core/cast/StringToSymbolNode.java index c41abe269515..b060923a08b9 100644 --- a/src/main/java/org/truffleruby/core/cast/StringToSymbolNode.java +++ b/src/main/java/org/truffleruby/core/cast/StringToSymbolNode.java @@ -9,27 +9,23 @@ */ package org.truffleruby.core.cast; -import org.truffleruby.core.string.RubyString; +import com.oracle.truffle.api.dsl.Cached; import org.truffleruby.core.symbol.RubySymbol; -import org.truffleruby.core.string.ImmutableRubyString; import org.truffleruby.language.RubyContextSourceNode; import org.truffleruby.language.RubyNode; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; +import org.truffleruby.language.library.RubyStringLibrary; /** Creates a symbol from a string. Must be a RubyNode because it's used in the translator. */ @NodeChild(value = "string", type = RubyNode.class) public abstract class StringToSymbolNode extends RubyContextSourceNode { @Specialization - protected RubySymbol doString(RubyString string) { - return getSymbol(string.rope, string.encoding); - } - - @Specialization - protected RubySymbol doString(ImmutableRubyString string) { - return getSymbol(string.rope, string.encoding); + protected RubySymbol doString(Object string, + @Cached RubyStringLibrary libString) { + return getSymbol(libString.getTString(string), libString.getEncoding(string)); } } diff --git a/src/main/java/org/truffleruby/core/cast/ToEncodingNode.java b/src/main/java/org/truffleruby/core/cast/ToEncodingNode.java deleted file mode 100644 index 4fc45f3bbc7a..000000000000 --- a/src/main/java/org/truffleruby/core/cast/ToEncodingNode.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2016, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.cast; - -import org.jcodings.Encoding; -import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.regexp.RubyRegexp; -import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.symbol.RubySymbol; -import org.truffleruby.core.string.ImmutableRubyString; -import org.truffleruby.language.RubyBaseNode; - -import com.oracle.truffle.api.dsl.Fallback; -import com.oracle.truffle.api.dsl.Specialization; - -/** Take a Ruby object that has an encoding and extracts the Java-level encoding object. */ -public abstract class ToEncodingNode extends RubyBaseNode { - - public static ToEncodingNode create() { - return ToEncodingNodeGen.create(); - } - - public abstract Encoding executeToEncoding(Object value); - - @Specialization - protected Encoding stringToEncoding(RubyString value) { - return value.getJCoding(); - } - - @Specialization - protected Encoding immutableStringToEncoding(ImmutableRubyString value) { - return value.getEncoding().jcoding; - } - - @Specialization - protected Encoding symbolToEncoding(RubySymbol value) { - return value.getRope().getEncoding(); - } - - @Specialization - protected Encoding regexpToEncoding(RubyRegexp value) { - return value.regex.getEncoding(); - } - - @Specialization - protected Encoding rubyEncodingToEncoding(RubyEncoding value) { - return value.jcoding; - } - - @Fallback - protected Encoding failure(Object value) { - return null; - } -} diff --git a/src/main/java/org/truffleruby/core/cast/ToRopeNode.java b/src/main/java/org/truffleruby/core/cast/ToRopeNode.java deleted file mode 100644 index 40fc0f0ff015..000000000000 --- a/src/main/java/org/truffleruby/core/cast/ToRopeNode.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Copyright (c) 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ - -package org.truffleruby.core.cast; - -import com.oracle.truffle.api.CompilerDirectives; -import com.oracle.truffle.api.dsl.Fallback; -import com.oracle.truffle.api.dsl.NodeChild; -import com.oracle.truffle.api.dsl.Specialization; -import org.jcodings.Encoding; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.string.ImmutableRubyString; -import org.truffleruby.core.string.RubyString; -import org.truffleruby.language.RubyBaseNodeWithExecute; - -@NodeChild(value = "child", type = RubyBaseNodeWithExecute.class) -public abstract class ToRopeNode extends RubyBaseNodeWithExecute { - - @Specialization - protected Rope coerceRubyString(RubyString string) { - return string.rope; - } - - @Specialization - protected Rope coerceImmutableRubyString(ImmutableRubyString string) { - return string.rope; - } - - @Fallback - protected Encoding failure(Object value) { - throw CompilerDirectives.shouldNotReachHere(); - } -} diff --git a/src/main/java/org/truffleruby/core/cast/ToRubyEncodingNode.java b/src/main/java/org/truffleruby/core/cast/ToRubyEncodingNode.java index cc58514a1e52..ef16138e49a9 100644 --- a/src/main/java/org/truffleruby/core/cast/ToRubyEncodingNode.java +++ b/src/main/java/org/truffleruby/core/cast/ToRubyEncodingNode.java @@ -9,6 +9,7 @@ */ package org.truffleruby.core.cast; +import com.oracle.truffle.api.dsl.Cached; import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.regexp.RubyRegexp; import org.truffleruby.core.string.ImmutableRubyString; @@ -18,6 +19,7 @@ import com.oracle.truffle.api.dsl.Fallback; import com.oracle.truffle.api.dsl.Specialization; +import org.truffleruby.language.library.RubyStringLibrary; /** Take a Ruby object that has an encoding and extracts the Java-level encoding object. */ public abstract class ToRubyEncodingNode extends RubyBaseNode { @@ -29,13 +31,15 @@ public static ToRubyEncodingNode create() { public abstract RubyEncoding executeToEncoding(Object value); @Specialization - protected RubyEncoding stringToEncoding(RubyString value) { - return value.encoding; + protected RubyEncoding stringToEncoding(RubyString value, + @Cached RubyStringLibrary libString) { + return libString.getEncoding(value); } @Specialization - protected RubyEncoding immutableStringToEncoding(ImmutableRubyString value) { - return value.getEncoding(); + protected RubyEncoding immutableStringToEncoding(ImmutableRubyString value, + @Cached RubyStringLibrary libString) { + return libString.getEncoding(value); } @Specialization diff --git a/src/main/java/org/truffleruby/core/cast/ToSNode.java b/src/main/java/org/truffleruby/core/cast/ToSNode.java index 12fe486b9a7c..e8629c7f0d59 100644 --- a/src/main/java/org/truffleruby/core/cast/ToSNode.java +++ b/src/main/java/org/truffleruby/core/cast/ToSNode.java @@ -9,7 +9,6 @@ */ package org.truffleruby.core.cast; -import com.oracle.truffle.api.library.CachedLibrary; import org.truffleruby.core.kernel.KernelNodes; import org.truffleruby.core.string.RubyString; import org.truffleruby.core.string.ImmutableRubyString; @@ -41,7 +40,7 @@ protected ImmutableRubyString toS(ImmutableRubyString string) { @Specialization(guards = "isNotRubyString(object)") protected Object toSFallback(VirtualFrame frame, Object object, @Cached DispatchNode callToSNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached RubyStringLibrary libString) { final Object value = callToSNode.callWithFrame(frame, object, "to_s"); if (libString.isRubyString(value)) { diff --git a/src/main/java/org/truffleruby/core/cast/ToStrNode.java b/src/main/java/org/truffleruby/core/cast/ToStrNode.java index 5ac49f953499..ed5fee04735f 100644 --- a/src/main/java/org/truffleruby/core/cast/ToStrNode.java +++ b/src/main/java/org/truffleruby/core/cast/ToStrNode.java @@ -11,7 +11,6 @@ package org.truffleruby.core.cast; import com.oracle.truffle.api.dsl.GenerateUncached; -import com.oracle.truffle.api.library.CachedLibrary; import org.truffleruby.core.string.RubyString; import org.truffleruby.core.string.ImmutableRubyString; import org.truffleruby.language.RubyBaseNodeWithExecute; @@ -48,7 +47,7 @@ protected ImmutableRubyString coerceImmutableRubyString(ImmutableRubyString stri protected Object coerceObject(Object object, @Cached BranchProfile errorProfile, @Cached DispatchNode toStrNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached RubyStringLibrary libString) { final Object coerced; try { coerced = toStrNode.call(object, "to_str"); diff --git a/src/main/java/org/truffleruby/core/cast/ToStringOrSymbolNode.java b/src/main/java/org/truffleruby/core/cast/ToStringOrSymbolNode.java index 69c96335dc86..1506e54100a6 100644 --- a/src/main/java/org/truffleruby/core/cast/ToStringOrSymbolNode.java +++ b/src/main/java/org/truffleruby/core/cast/ToStringOrSymbolNode.java @@ -10,7 +10,6 @@ package org.truffleruby.core.cast; import com.oracle.truffle.api.dsl.GenerateUncached; -import com.oracle.truffle.api.library.CachedLibrary; import org.truffleruby.core.string.RubyString; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.core.string.ImmutableRubyString; @@ -54,7 +53,7 @@ protected ImmutableRubyString coerceRubyString(ImmutableRubyString string) { protected Object coerceObject(Object object, @Cached DispatchNode toStr, @Cached BranchProfile errorProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached RubyStringLibrary libString) { final Object coerced; try { coerced = toStr.call(object, "to_str"); diff --git a/src/main/java/org/truffleruby/core/cast/ToSymbolNode.java b/src/main/java/org/truffleruby/core/cast/ToSymbolNode.java index 761aacb5a54b..8205bf9644a6 100644 --- a/src/main/java/org/truffleruby/core/cast/ToSymbolNode.java +++ b/src/main/java/org/truffleruby/core/cast/ToSymbolNode.java @@ -11,11 +11,10 @@ import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.NodeChild; -import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.profiles.BranchProfile; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; +import org.truffleruby.core.string.StringHelperNodes; import org.truffleruby.core.symbol.RubySymbol; import com.oracle.truffle.api.dsl.GenerateUncached; @@ -62,31 +61,28 @@ protected RubySymbol javaStringUncached(String str) { } @Specialization( - guards = { - "strings.isRubyString(str)", - "equals.execute(strings.getRope(str), cachedRope)", - "strings.getEncoding(str) == cachedEncoding" }, + guards = { "strings.isRubyString(str)", "equalNode.execute(strings, str, cachedTString, cachedEncoding)" }, limit = "getCacheLimit()") protected RubySymbol rubyString(Object str, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached(value = "strings.getRope(str)") Rope cachedRope, + @Cached RubyStringLibrary strings, + @Cached(value = "asTruffleStringUncached(str)") TruffleString cachedTString, @Cached(value = "strings.getEncoding(str)") RubyEncoding cachedEncoding, - @Cached RopeNodes.EqualNode equals, - @Cached(value = "getSymbol(cachedRope, cachedEncoding)") RubySymbol rubySymbol) { + @Cached StringHelperNodes.EqualSameEncodingNode equalNode, + @Cached(value = "getSymbol(cachedTString, cachedEncoding)") RubySymbol rubySymbol) { return rubySymbol; } - @Specialization(guards = "strings.isRubyString(str)", replaces = "rubyString") + @Specialization(guards = "strings.isRubyString(str)", replaces = "rubyString", limit = "1") protected RubySymbol rubyStringUncached(Object str, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - return getSymbol(strings.getRope(str), strings.getEncoding(str)); + @Cached RubyStringLibrary strings) { + return getSymbol(strings.getTString(str), strings.getEncoding(str)); } @Specialization(guards = { "!isRubySymbol(object)", "!isString(object)", "isNotRubyString(object)" }) protected RubySymbol toStr(Object object, @Cached BranchProfile errorProfile, @Cached DispatchNode toStr, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString, + @Cached RubyStringLibrary libString, @Cached ToSymbolNode toSymbolNode) { final Object coerced; try { diff --git a/src/main/java/org/truffleruby/core/encoding/EncodingConverterNodes.java b/src/main/java/org/truffleruby/core/encoding/EncodingConverterNodes.java index 1530440247b2..2877aa9ffc70 100644 --- a/src/main/java/org/truffleruby/core/encoding/EncodingConverterNodes.java +++ b/src/main/java/org/truffleruby/core/encoding/EncodingConverterNodes.java @@ -11,13 +11,12 @@ */ package org.truffleruby.core.encoding; -import static org.truffleruby.core.rope.CodeRange.CR_UNKNOWN; - import java.nio.charset.StandardCharsets; import java.util.Set; -import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.object.Shape; +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.TruffleString; import org.jcodings.Encoding; import org.jcodings.Ptr; import org.jcodings.transcode.EConv; @@ -36,20 +35,12 @@ import org.truffleruby.core.array.RubyArray; import org.truffleruby.core.cast.ToStrNode; import org.truffleruby.core.cast.ToStrNodeGen; -import org.truffleruby.core.hash.RubyHash; import org.truffleruby.core.klass.RubyClass; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.TStringBuilder; import org.truffleruby.core.string.EncodingUtils; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.core.string.StringUtils; import org.truffleruby.core.symbol.RubySymbol; -import org.truffleruby.language.Nil; import org.truffleruby.language.NotProvided; import org.truffleruby.language.RubyBaseNodeWithExecute; import org.truffleruby.language.RubyNode; @@ -116,7 +107,8 @@ protected Object initialize( } final byte[] segmentSource = transcoder.getSource(); - ret[retIndex++] = getSymbol(StringUtils.toUpperCase(RopeOperations.decodeAscii(segmentSource))); + ret[retIndex++] = getSymbol( + StringUtils.toUpperCase(new String(segmentSource, StandardCharsets.US_ASCII))); } final int retSize = retIndex + 1; @@ -127,7 +119,7 @@ protected Object initialize( } final byte[] destinationName = destinationEncoding.getName(); - ret[retIndex] = getSymbol(StringUtils.toUpperCase(RopeOperations.decodeAscii(destinationName))); + ret[retIndex] = getSymbol(StringUtils.toUpperCase(new String(destinationName, StandardCharsets.US_ASCII))); return createArray(ret); } @@ -187,73 +179,25 @@ protected Object search(RubySymbol source) { @Primitive(name = "encoding_converter_primitive_convert", lowerFixnum = { 3, 4, 5 }) public abstract static class PrimitiveConvertNode extends PrimitiveArrayArgumentsNode { - @Child private RopeNodes.SubstringNode substringNode = RopeNodes.SubstringNode.create(); - @TruffleBoundary - @Specialization(guards = "stringsSource.isRubyString(source)") - protected Object encodingConverterPrimitiveConvert( - RubyEncodingConverter encodingConverter, - Object source, - RubyString target, - int offset, - int size, - RubyHash options, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsSource) { - throw new UnsupportedOperationException("not implemented"); - } - @Specialization - protected Object primitiveConvertNilSource( - RubyEncodingConverter encodingConverter, - Nil source, - RubyString target, - int offset, - int size, - int options, - @Cached DispatchNode destinationEncodingNode) { - return primitiveConvertHelper( - encodingConverter, - source, - RopeConstants.EMPTY_UTF8_ROPE, - target, - offset, - size, - options, - destinationEncodingNode); - } - - @Specialization(guards = "stringsSource.isRubyString(source)") protected Object encodingConverterPrimitiveConvert( RubyEncodingConverter encodingConverter, - Object source, + RubyString source, RubyString target, int offset, int size, int options, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsSource, - @Cached DispatchNode destinationEncodingNode) { - + @Cached RubyStringLibrary libString, + @Cached DispatchNode destinationEncodingNode, + @Cached TruffleString.SubstringByteIndexNode substringNode, + @Cached TruffleString.GetInternalByteArrayNode getInternalByteArrayNode) { // Taken from org.jruby.RubyConverter#primitive_convert. - return primitiveConvertHelper( - encodingConverter, - source, - stringsSource.getRope(source), - target, - offset, - size, - options, - destinationEncodingNode); - } - - @TruffleBoundary - private Object primitiveConvertHelper(RubyEncodingConverter encodingConverter, Object source, Rope sourceRope, - RubyString target, int offset, int size, int options, DispatchNode destinationEncodingNode) { - // Taken from org.jruby.RubyConverter#primitive_convert. + var tencoding = libString.getTEncoding(source); + var tstring = source.tstring; - Rope targetRope = target.rope; - final boolean nonNullSource = source != nil; - final RopeBuilder outBytes = RopeOperations.toRopeBuilderCopy(targetRope); + final TStringBuilder outBytes = TStringBuilder.create(target); final Ptr inPtr = new Ptr(); final Ptr outPtr = new Ptr(); @@ -264,17 +208,11 @@ private Object primitiveConvertHelper(RubyEncodingConverter encodingConverter, O final boolean growOutputBuffer = (size == -1); if (size == -1) { - size = 16; // in MRI, this is RSTRING_EMBED_LEN_MAX - - if (nonNullSource) { - if (size < sourceRope.byteLength()) { - size = sourceRope.byteLength(); - } - } + int minSize = 16; // in MRI, this is RSTRING_EMBED_LEN_MAX + size = Math.max(minSize, source.byteLengthUncached()); } while (true) { - if (changeOffset) { offset = outBytes.getLength(); } @@ -285,7 +223,7 @@ private Object primitiveConvertHelper(RubyEncodingConverter encodingConverter, O coreExceptions().argumentError("output offset too big", this)); } - long outputByteEnd = offset + size; + long outputByteEnd = (long) offset + size; if (outputByteEnd > Integer.MAX_VALUE) { // overflow check @@ -296,25 +234,21 @@ private Object primitiveConvertHelper(RubyEncodingConverter encodingConverter, O outBytes.unsafeEnsureSpace((int) outputByteEnd); - inPtr.p = 0; + var sourceBytes = getInternalByteArrayNode.execute(tstring, tencoding); + + inPtr.p = sourceBytes.getOffset(); outPtr.p = offset; - int os = outPtr.p + size; - EConvResult res = convert( - ec, - sourceRope.getBytes(), - inPtr, - sourceRope.byteLength() + inPtr.p, - outBytes.getUnsafeBytes(), - outPtr, - os, + EConvResult res = ec.convert( + sourceBytes.getArray(), inPtr, sourceBytes.getEnd(), + outBytes.getUnsafeBytes(), outPtr, outPtr.p + size, options); outBytes.setLength(outPtr.p); - if (nonNullSource) { - sourceRope = substringNode.executeSubstring(sourceRope, inPtr.p, sourceRope.byteLength() - inPtr.p); - ((RubyString) source).setRope(sourceRope); - } + int inputOffset = inPtr.p - sourceBytes.getOffset(); + tstring = substringNode.execute(source.tstring, inputOffset, source.byteLengthUncached() - inputOffset, + tencoding, true); + source.setTString(tstring); if (growOutputBuffer && res == EConvResult.DestinationBufferFull) { if (Integer.MAX_VALUE / 2 < size) { @@ -327,29 +261,22 @@ private Object primitiveConvertHelper(RubyEncodingConverter encodingConverter, O } if (ec.destinationEncoding != null) { - outBytes.setEncoding(ec.destinationEncoding); + outBytes.setEncoding(Encodings.getBuiltInEncoding(ec.destinationEncoding)); } - target.setRope( - RopeOperations.ropeFromRopeBuilder(outBytes), - (RubyEncoding) destinationEncodingNode.call(encodingConverter, "destination_encoding")); + var destinationEncoding = (RubyEncoding) destinationEncodingNode.call(encodingConverter, + "destination_encoding"); + target.setTString(outBytes.toTString(), destinationEncoding); return getSymbol(res.symbolicName()); } } - - @TruffleBoundary - private EConvResult convert(EConv ec, byte[] in, Ptr inPtr, int inStop, byte[] out, Ptr outPtr, int outStop, - int flags) { - return ec.convert(in, inPtr, inStop, out, outPtr, outStop, flags); - } - } @CoreMethod(names = "putback", optional = 1, lowerFixnum = 1) public abstract static class EncodingConverterPutbackNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromByteArrayNode fromByteArrayNode = TruffleString.FromByteArrayNode.create(); @Specialization protected RubyString encodingConverterPutback(RubyEncodingConverter encodingConverter, int maxBytes, @@ -383,7 +310,7 @@ private RubyString putback(RubyEncodingConverter encodingConverter, int n, Dispa final Object sourceEncoding = (RubyEncoding) sourceEncodingNode.call(encodingConverter, "source_encoding"); final RubyEncoding rubyEncoding = sourceEncoding == nil ? Encodings.BINARY : (RubyEncoding) sourceEncoding; - return makeStringNode.executeMake(bytes, rubyEncoding, CodeRange.CR_UNKNOWN); + return createString(fromByteArrayNode, bytes, rubyEncoding); } } @@ -393,7 +320,7 @@ public abstract static class EncodingConverterLastErrorNode extends PrimitiveArr @TruffleBoundary @Specialization protected Object encodingConverterLastError(RubyEncodingConverter encodingConverter, - @Cached StringNodes.MakeStringNode makeStringNode) { + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { final EConv ec = encodingConverter.econv; final EConv.LastError lastError = ec.lastError; @@ -408,18 +335,20 @@ protected Object encodingConverterLastError(RubyEncodingConverter encodingConver final Object[] store = new Object[size]; store[0] = eConvResultToSymbol(lastError.getResult()); - store[1] = makeStringNode.executeMake(lastError.getSource(), Encodings.BINARY, CR_UNKNOWN); - store[2] = makeStringNode.executeMake(lastError.getDestination(), Encodings.BINARY, CR_UNKNOWN); - store[3] = makeStringNode.fromBuilderUnsafe(RopeBuilder.createRopeBuilder( + store[1] = createString(fromByteArrayNode, lastError.getSource(), Encodings.BINARY); + store[2] = createString(fromByteArrayNode, lastError.getDestination(), Encodings.BINARY); + var errorTString = TStringBuilder.create( lastError.getErrorBytes(), lastError.getErrorBytesP(), - lastError.getErrorBytesP() + lastError.getErrorBytesLength()), Encodings.BINARY, CR_UNKNOWN); + lastError.getErrorBytesLength()).toTStringUnsafe(fromByteArrayNode); + store[3] = createString(errorTString, Encodings.BINARY); if (readAgain) { - store[4] = makeStringNode.fromBuilderUnsafe(RopeBuilder.createRopeBuilder( + var readAgainTString = TStringBuilder.create( lastError.getErrorBytes(), - lastError.getErrorBytesLength() + lastError.getErrorBytesP(), - lastError.getReadAgainLength()), Encodings.BINARY, CR_UNKNOWN); + lastError.getErrorBytesP() + lastError.getErrorBytesLength(), + lastError.getReadAgainLength()).toTStringUnsafe(fromByteArrayNode); + store[4] = createString(readAgainTString, Encodings.BINARY); } return createArray(store); @@ -454,35 +383,32 @@ public abstract static class EncodingConverterErrinfoNode extends CoreMethodArra @TruffleBoundary @Specialization protected RubyArray encodingConverterLastError(RubyEncodingConverter encodingConverter, - @Cached StringNodes.MakeStringNode makeStringNode) { + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { final EConv ec = encodingConverter.econv; + final EConv.LastError lastError = ec.lastError; - final Object[] ret = { getSymbol(ec.lastError.getResult().symbolicName()), nil, nil, nil, nil }; + final Object[] ret = { getSymbol(lastError.getResult().symbolicName()), nil, nil, nil, nil }; - if (ec.lastError.getSource() != null) { - ret[1] = makeStringNode.executeMake(ec.lastError.getSource(), Encodings.BINARY, CR_UNKNOWN); + if (lastError.getSource() != null) { + ret[1] = createString(fromByteArrayNode, lastError.getSource(), Encodings.BINARY); } - if (ec.lastError.getDestination() != null) { - ret[2] = makeStringNode.executeMake(ec.lastError.getDestination(), Encodings.BINARY, CR_UNKNOWN); + if (lastError.getDestination() != null) { + ret[2] = createString(fromByteArrayNode, lastError.getDestination(), Encodings.BINARY); } - if (ec.lastError.getErrorBytes() != null) { - ret[3] = makeStringNode - .fromBuilderUnsafe( - RopeBuilder.createRopeBuilder( - ec.lastError.getErrorBytes(), - ec.lastError.getErrorBytesP(), - ec.lastError.getErrorBytesLength()), - Encodings.BINARY, - CR_UNKNOWN); - ret[4] = makeStringNode.fromBuilderUnsafe( - RopeBuilder.createRopeBuilder( - ec.lastError.getErrorBytes(), - ec.lastError.getErrorBytesP() + ec.lastError.getErrorBytesLength(), - ec.lastError.getReadAgainLength()), - Encodings.BINARY, - CR_UNKNOWN); + if (lastError.getErrorBytes() != null) { + var errorTString = TStringBuilder.create( + lastError.getErrorBytes(), + lastError.getErrorBytesP(), + lastError.getErrorBytesLength()).toTStringUnsafe(fromByteArrayNode); + ret[3] = createString(errorTString, Encodings.BINARY); + + var readAgainTString = TStringBuilder.create( + lastError.getErrorBytes(), + lastError.getErrorBytesP() + lastError.getErrorBytesLength(), + lastError.getReadAgainLength()).toTStringUnsafe(fromByteArrayNode); + ret[4] = createString(readAgainTString, Encodings.BINARY); } return createArray(ret); @@ -493,7 +419,7 @@ protected RubyArray encodingConverterLastError(RubyEncodingConverter encodingCon @CoreMethod(names = "replacement") public abstract static class EncodingConverterReplacementNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromByteArrayNode fromByteArrayNode = TruffleString.FromByteArrayNode.create(); @TruffleBoundary @Specialization @@ -511,7 +437,7 @@ protected RubyString getReplacement(RubyEncodingConverter encodingConverter) { final String encodingName = new String(ec.replacementEncoding, StandardCharsets.US_ASCII); final RubyEncoding encoding = getContext().getEncodingManager().getRubyEncoding(encodingName); - return makeStringNode.executeMake(bytes, encoding, CodeRange.CR_UNKNOWN); + return createString(fromByteArrayNode, bytes, encoding); } } @@ -526,16 +452,17 @@ protected ToStrNode coerceReplacementToString(RubyBaseNodeWithExecute replacemen return ToStrNodeGen.create(replacement); } - @Specialization(guards = "libReplacement.isRubyString(replacement)") + @Specialization(guards = "libReplacement.isRubyString(replacement)", limit = "1") protected Object setReplacement(RubyEncodingConverter encodingConverter, Object replacement, @Cached BranchProfile errorProfile, - @Cached RopeNodes.BytesNode bytesNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libReplacement) { - final EConv ec = encodingConverter.econv; - final Rope rope = libReplacement.getRope(replacement); - final Encoding encoding = rope.getEncoding(); + @Cached TruffleString.GetInternalByteArrayNode bytesNode, + @Cached RubyStringLibrary libReplacement) { + var tstring = libReplacement.getTString(replacement); + var encoding = libReplacement.getEncoding(replacement); - final int ret = setReplacement(ec, bytesNode.execute(rope), rope.byteLength(), encoding.getName()); + final InternalByteArray byteArray = bytesNode.execute(tstring, encoding.tencoding); + int ret = setReplacement(encodingConverter.econv, byteArray.getArray(), byteArray.getOffset(), + byteArray.getLength(), encoding.jcoding.getName()); if (ret == -1) { errorProfile.enter(); @@ -548,8 +475,8 @@ protected Object setReplacement(RubyEncodingConverter encodingConverter, Object } @TruffleBoundary - private int setReplacement(EConv ec, byte[] string, int len, byte[] encodingName) { - return ec.setReplacement(string, 0, len, encodingName); + private int setReplacement(EConv ec, byte[] bytes, int offset, int len, byte[] encodingName) { + return ec.setReplacement(bytes, offset, len, encodingName); } } diff --git a/src/main/java/org/truffleruby/core/encoding/EncodingManager.java b/src/main/java/org/truffleruby/core/encoding/EncodingManager.java index 74c3a78c30bd..8705df3a04a8 100644 --- a/src/main/java/org/truffleruby/core/encoding/EncodingManager.java +++ b/src/main/java/org/truffleruby/core/encoding/EncodingManager.java @@ -13,6 +13,7 @@ package org.truffleruby.core.encoding; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Locale; import java.util.Map; @@ -26,14 +27,12 @@ import org.jcodings.Encoding; import org.jcodings.EncodingDB; import org.jcodings.specific.ASCIIEncoding; -import org.jcodings.specific.USASCIIEncoding; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.core.array.ArrayUtils; import org.truffleruby.core.klass.RubyClass; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.core.string.EncodingUtils; +import org.truffleruby.core.string.StringOperations; import org.truffleruby.extra.ffi.Pointer; import org.truffleruby.platform.NativeConfiguration; import org.truffleruby.platform.TruffleNFIPlatform; @@ -90,7 +89,7 @@ private void initializeEncodingAliases(RubyClass encodingClass) { // The alias name should be exactly the one in the encodings DB. final Encoding encoding = encodingEntry.getEncoding(); final RubyEncoding rubyEncoding = defineAlias(encoding, - RopeOperations.decodeAscii(entry.bytes, entry.p, entry.end)); + new String(entry.bytes, entry.p, entry.end - entry.p, StandardCharsets.US_ASCII)); // The constant names must be treated by the the encodingNames helper. for (String constName : EncodingUtils.encodingNames(entry.bytes, entry.p, entry.end)) { @@ -158,7 +157,7 @@ private void initializeLocaleEncoding(TruffleNFIPlatform nfi, NativeConfiguratio context, InteropLibrary.getUncached(), 0); - localeEncodingName = RopeOperations.decodeAscii(bytes); + localeEncodingName = new String(bytes, StandardCharsets.US_ASCII); } else { localeEncodingName = Charset.defaultCharset().name(); } @@ -168,7 +167,7 @@ private void initializeLocaleEncoding(TruffleNFIPlatform nfi, NativeConfiguratio rubyEncoding = Encodings.US_ASCII; } - if (context.getOptions().WARN_LOCALE && rubyEncoding.jcoding == USASCIIEncoding.INSTANCE) { + if (context.getOptions().WARN_LOCALE && rubyEncoding == Encodings.US_ASCII) { if ("C".equals(System.getenv("LANG")) && "C".equals(System.getenv("LC_ALL"))) { // The parent process seems to explicitly want a C locale (e.g. EnvUtil#invoke_ruby in the MRI test harness), so only warn at config level in this case. RubyLanguage.LOGGER.config( @@ -186,8 +185,8 @@ private void initializeLocaleEncoding(TruffleNFIPlatform nfi, NativeConfiguratio } @TruffleBoundary - public static Encoding getEncoding(Rope name) { - EncodingDB.Entry entry = EncodingDB.getEncodings().get(name.getBytes()); + public static Encoding getEncoding(String name) { + EncodingDB.Entry entry = EncodingDB.getEncodings().get(StringOperations.encodeAsciiBytes(name)); if (entry == null) { entry = EncodingDB.getAliases().get(name.getBytes()); @@ -225,19 +224,20 @@ public RubyEncoding getRubyEncoding(String name) { } } - public RubyEncoding getRubyEncoding(int encodingIndex) { + // Should only be used by Primitive.encoding_get_encoding_by_index + RubyEncoding getRubyEncoding(int encodingIndex) { return ENCODING_LIST_BY_ENCODING_INDEX[encodingIndex]; } @TruffleBoundary public synchronized RubyEncoding defineBuiltInEncoding(EncodingDB.Entry encodingEntry) { final int encodingIndex = encodingEntry.getEncoding().getIndex(); - final RubyEncoding rubyEncoding = Encodings.getBuiltInEncoding(encodingIndex); + final RubyEncoding rubyEncoding = Encodings.getBuiltInEncoding(encodingEntry.getEncoding()); assert ENCODING_LIST_BY_ENCODING_INDEX[encodingIndex] == null; ENCODING_LIST_BY_ENCODING_INDEX[encodingIndex] = rubyEncoding; - addToLookup(rubyEncoding.jcoding.toString(), rubyEncoding); + addToLookup(rubyEncoding.toString(), rubyEncoding); return rubyEncoding; } @@ -251,14 +251,14 @@ public synchronized RubyEncoding defineDynamicEncoding(Encoding encoding, byte[] ENCODING_LIST_BY_ENCODING_INDEX = Arrays.copyOf(ENCODING_LIST_BY_ENCODING_INDEX, encodingIndex + 1); ENCODING_LIST_BY_ENCODING_INDEX[encodingIndex] = rubyEncoding; - addToLookup(RopeOperations.decodeRope(rubyEncoding.name.rope), rubyEncoding); + addToLookup(rubyEncoding.name.getJavaString(), rubyEncoding); return rubyEncoding; } @TruffleBoundary public RubyEncoding defineAlias(Encoding encoding, String name) { - final RubyEncoding rubyEncoding = Encodings.getBuiltInEncoding(encoding.getIndex()); + final RubyEncoding rubyEncoding = Encodings.getBuiltInEncoding(encoding); addToLookup(name, rubyEncoding); return rubyEncoding; } @@ -274,7 +274,7 @@ public synchronized RubyEncoding createDummyEncoding(String name) { return null; } - final byte[] nameBytes = RopeOperations.encodeAsciiBytes(name); + final byte[] nameBytes = StringOperations.encodeAsciiBytes(name); return defineDynamicEncoding(Encodings.DUMMY_ENCODING_BASE, nameBytes); } @@ -284,7 +284,7 @@ public synchronized RubyEncoding replicateEncoding(RubyEncoding encoding, String return null; } - final byte[] nameBytes = RopeOperations.encodeAsciiBytes(name); + final byte[] nameBytes = StringOperations.encodeAsciiBytes(name); return defineDynamicEncoding(encoding.jcoding, nameBytes); } diff --git a/src/main/java/org/truffleruby/core/encoding/EncodingNodes.java b/src/main/java/org/truffleruby/core/encoding/EncodingNodes.java index e53ab969f8b7..7dd33035b1e7 100644 --- a/src/main/java/org/truffleruby/core/encoding/EncodingNodes.java +++ b/src/main/java/org/truffleruby/core/encoding/EncodingNodes.java @@ -12,11 +12,9 @@ package org.truffleruby.core.encoding; import com.oracle.truffle.api.dsl.ImportStatic; -import com.oracle.truffle.api.library.CachedLibrary; -import org.jcodings.Encoding; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import org.jcodings.EncodingDB; -import org.jcodings.specific.USASCIIEncoding; -import org.jcodings.unicode.UnicodeEncoding; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreModule; @@ -25,24 +23,18 @@ import org.truffleruby.builtins.UnaryCoreMethodNode; import org.truffleruby.core.array.ArrayUtils; import org.truffleruby.core.array.RubyArray; -import org.truffleruby.core.cast.ToEncodingNode; import org.truffleruby.core.cast.ToRubyEncodingNode; import org.truffleruby.core.encoding.EncodingNodesFactory.NegotiateCompatibleEncodingNodeGen; -import org.truffleruby.core.encoding.EncodingNodesFactory.NegotiateCompatibleRopeEncodingNodeGen; +import org.truffleruby.core.encoding.EncodingNodesFactory.NegotiateCompatibleStringEncodingNodeGen; import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.regexp.RubyRegexp; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeGuards; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeWithEncoding; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes.MakeStringNode; -import org.truffleruby.core.symbol.RubySymbol; +import org.truffleruby.core.string.StringGuards; import org.truffleruby.core.string.ImmutableRubyString; +import org.truffleruby.interop.ToJavaStringNode; import org.truffleruby.language.Nil; import org.truffleruby.language.RubyBaseNode; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.Visibility; import org.truffleruby.language.control.RaiseException; import org.truffleruby.language.library.RubyStringLibrary; @@ -51,11 +43,12 @@ import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.Cached; -import com.oracle.truffle.api.dsl.Fallback; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.profiles.BranchProfile; import com.oracle.truffle.api.profiles.ConditionProfile; +import static com.oracle.truffle.api.strings.TruffleString.CodeRange.ASCII; + @CoreModule(value = "Encoding", isClass = true) public abstract class EncodingNodes { @@ -63,25 +56,21 @@ public abstract class EncodingNodes { public abstract static class AsciiCompatibleNode extends CoreMethodArrayArgumentsNode { @Specialization protected boolean isAsciiCompatible(RubyEncoding encoding) { - return encoding.jcoding.isAsciiCompatible(); + return encoding.isAsciiCompatible; } } // MRI: enc_compatible_str and enc_compatible_latter - @ImportStatic({ CodeRange.class, RopeGuards.class }) - public abstract static class NegotiateCompatibleRopeEncodingNode extends RubyBaseNode { - - @Child RopeNodes.CodeRangeNode codeRangeNode; + @ImportStatic(TruffleString.CodeRange.class) + public abstract static class NegotiateCompatibleStringEncodingNode extends RubyBaseNode { - public abstract RubyEncoding executeNegotiate(Rope first, RubyEncoding firstEncoding, Rope second, - RubyEncoding secondEncoding); + @Child TruffleString.GetByteCodeRangeNode codeRangeNode; - public RubyEncoding negotiate(RopeWithEncoding first, RopeWithEncoding second) { - return executeNegotiate(first.getRope(), first.getEncoding(), second.getRope(), second.getEncoding()); - } + public abstract RubyEncoding execute(AbstractTruffleString first, RubyEncoding firstEncoding, + AbstractTruffleString second, RubyEncoding secondEncoding); - public static NegotiateCompatibleRopeEncodingNode create() { - return NegotiateCompatibleRopeEncodingNodeGen.create(); + public static NegotiateCompatibleStringEncodingNode create() { + return NegotiateCompatibleStringEncodingNodeGen.create(); } @Specialization(guards = { @@ -89,17 +78,23 @@ public static NegotiateCompatibleRopeEncodingNode create() { "secondEncoding == cachedEncoding", }, limit = "getCacheLimit()") protected RubyEncoding negotiateSameEncodingCached( - Rope first, RubyEncoding firstEncoding, Rope second, RubyEncoding secondEncoding, + AbstractTruffleString first, + RubyEncoding firstEncoding, + AbstractTruffleString second, + RubyEncoding secondEncoding, @Cached("firstEncoding") RubyEncoding cachedEncoding) { - assert first.encoding == firstEncoding.jcoding && second.encoding == secondEncoding.jcoding; + assert first.isCompatibleTo(firstEncoding.tencoding) && second.isCompatibleTo(secondEncoding.tencoding); return cachedEncoding; } @Specialization(guards = "firstEncoding == secondEncoding", replaces = "negotiateSameEncodingCached") protected RubyEncoding negotiateSameEncodingUncached( - Rope first, RubyEncoding firstEncoding, Rope second, RubyEncoding secondEncoding) { - assert first.encoding == firstEncoding.jcoding && second.encoding == secondEncoding.jcoding; + AbstractTruffleString first, + RubyEncoding firstEncoding, + AbstractTruffleString second, + RubyEncoding secondEncoding) { + assert first.isCompatibleTo(firstEncoding.tencoding) && second.isCompatibleTo(secondEncoding.tencoding); return firstEncoding; } @@ -107,10 +102,13 @@ protected RubyEncoding negotiateSameEncodingUncached( "firstEncoding != secondEncoding", "firstEncoding == cachedEncoding", "isStandardEncoding(cachedEncoding)", - "getCodeRange(second) == CR_7BIT" + "getCodeRange(second, secondEncoding) == ASCII" }) protected RubyEncoding negotiateStandardEncodingAndCr7Bit( - Rope first, RubyEncoding firstEncoding, Rope second, RubyEncoding secondEncoding, + AbstractTruffleString first, + RubyEncoding firstEncoding, + AbstractTruffleString second, + RubyEncoding secondEncoding, @Cached("firstEncoding") RubyEncoding cachedEncoding) { // Encoding negotiation of two strings is most often between strings with the same encoding. The next most // frequent case is two strings with different encodings, but each being one of the standard/default runtime @@ -127,56 +125,66 @@ protected RubyEncoding negotiateStandardEncodingAndCr7Bit( "second.isEmpty() == isSecondEmpty", "cachedFirstEncoding == firstEncoding", "cachedSecondEncoding == secondEncoding", - "codeRangeNode.execute(first) == firstCodeRange", - "codeRangeNode.execute(second) == secondCodeRange" }, + "getCodeRange(first, firstEncoding) == firstCodeRange", + "getCodeRange(second, secondEncoding) == secondCodeRange" }, limit = "getCacheLimit()") protected RubyEncoding negotiateRopeRopeCached( - Rope first, RubyEncoding firstEncoding, Rope second, RubyEncoding secondEncoding, + AbstractTruffleString first, + RubyEncoding firstEncoding, + AbstractTruffleString second, + RubyEncoding secondEncoding, @Cached("first.isEmpty()") boolean isFirstEmpty, @Cached("second.isEmpty()") boolean isSecondEmpty, - @Cached("first.getCodeRange()") CodeRange firstCodeRange, - @Cached("second.getCodeRange()") CodeRange secondCodeRange, + @Cached("getCodeRange(first, firstEncoding)") TruffleString.CodeRange firstCodeRange, + @Cached("getCodeRange(second, secondEncoding)") TruffleString.CodeRange secondCodeRange, @Cached("firstEncoding") RubyEncoding cachedFirstEncoding, @Cached("secondEncoding") RubyEncoding cachedSecondEncoding, - @Cached("compatibleEncodingForRopes(first, firstEncoding, second, secondEncoding)") RubyEncoding negotiatedEncoding, - @Cached RopeNodes.CodeRangeNode codeRangeNode) { - assert first.encoding == firstEncoding.jcoding && second.encoding == secondEncoding.jcoding; + @Cached("compatibleEncodingForRopes(first, firstEncoding, second, secondEncoding)") RubyEncoding negotiatedEncoding) { + assert first.isCompatibleTo(firstEncoding.tencoding) && second.isCompatibleTo(secondEncoding.tencoding); return negotiatedEncoding; } @Specialization(guards = "firstEncoding != secondEncoding", replaces = "negotiateRopeRopeCached") protected RubyEncoding negotiateRopeRopeUncached( - Rope first, RubyEncoding firstEncoding, Rope second, RubyEncoding secondEncoding) { - assert first.encoding == firstEncoding.jcoding && second.encoding == secondEncoding.jcoding; + AbstractTruffleString first, + RubyEncoding firstEncoding, + AbstractTruffleString second, + RubyEncoding secondEncoding) { + assert first.isCompatibleTo(firstEncoding.tencoding) && second.isCompatibleTo(secondEncoding.tencoding); return compatibleEncodingForRopes(first, firstEncoding, second, secondEncoding); } + /** This method returns non-null if either: + *
    + *
  • one side is empty
  • + *
  • one side is 7-bit and both encodings are ascii-compatible
  • + *
+ */ @TruffleBoundary - protected static RubyEncoding compatibleEncodingForRopes(Rope firstRope, RubyEncoding firstRubyEncoding, - Rope secondRope, RubyEncoding secondRubyEncoding) { + protected RubyEncoding compatibleEncodingForRopes(AbstractTruffleString firstRope, RubyEncoding firstEncoding, + AbstractTruffleString secondRope, RubyEncoding secondEncoding) { // MRI: enc_compatible_latter - - final Encoding firstEncoding = firstRope.getEncoding(); - final Encoding secondEncoding = secondRope.getEncoding(); + assert firstEncoding != secondEncoding : "this method assumes the encodings are different"; if (secondRope.isEmpty()) { - return firstRubyEncoding; + return firstEncoding; } if (firstRope.isEmpty()) { - return (firstEncoding.isAsciiCompatible() && secondRope.getCodeRange() == CodeRange.CR_7BIT) - ? firstRubyEncoding - : secondRubyEncoding; + return (firstEncoding.isAsciiCompatible && + StringGuards.is7Bit(secondRope, secondEncoding, getCodeRangeNode())) + ? firstEncoding + : secondEncoding; } - if (!firstEncoding.isAsciiCompatible() || !secondEncoding.isAsciiCompatible()) { + if (!firstEncoding.isAsciiCompatible || !secondEncoding.isAsciiCompatible) { return null; } - if (secondRope.getCodeRange() == CodeRange.CR_7BIT) { - return firstRubyEncoding; + if (StringGuards.is7Bit(secondRope, secondEncoding, getCodeRangeNode())) { + return firstEncoding; } - if (firstRope.getCodeRange() == CodeRange.CR_7BIT) { - return secondRubyEncoding; + if (StringGuards.is7Bit(firstRope, firstEncoding, getCodeRangeNode())) { + return secondEncoding; } return null; @@ -186,13 +194,17 @@ protected int getCacheLimit() { return getLanguage().options.ENCODING_COMPATIBLE_QUERY_CACHE; } - protected CodeRange getCodeRange(Rope rope) { + protected TruffleString.CodeRange getCodeRange(AbstractTruffleString string, RubyEncoding encoding) { + return getCodeRangeNode().execute(string, encoding.tencoding); + } + + private TruffleString.GetByteCodeRangeNode getCodeRangeNode() { if (codeRangeNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); - codeRangeNode = insert(RopeNodes.CodeRangeNode.create()); + codeRangeNode = insert(TruffleString.GetByteCodeRangeNode.create()); } - return codeRangeNode.execute(rope); + return codeRangeNode; } /** Indicates whether the encoding is one of the runtime-default encodings. Many (most?) applications do not @@ -208,7 +220,7 @@ protected boolean isStandardEncoding(RubyEncoding encoding) { // MRI: enc_compatible_latter public abstract static class NegotiateCompatibleEncodingNode extends RubyBaseNode { - @Child private RopeNodes.CodeRangeNode codeRangeNode; + @Child private TruffleString.GetByteCodeRangeNode codeRangeNode; @Child private ToRubyEncodingNode getEncodingNode = ToRubyEncodingNode.create(); public static NegotiateCompatibleEncodingNode create() { @@ -235,17 +247,17 @@ protected RubyEncoding negotiateSameEncodingUncached(Object first, Object second return getEncoding(first); } - @Specialization(guards = { "libFirst.isRubyString(first)", "libSecond.isRubyString(second)" }) + @Specialization(guards = { "libFirst.isRubyString(first)", "libSecond.isRubyString(second)" }, limit = "1") protected RubyEncoding negotiateStringStringEncoding(Object first, Object second, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFirst, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libSecond, - @Cached NegotiateCompatibleRopeEncodingNode ropeNode) { + @Cached RubyStringLibrary libFirst, + @Cached RubyStringLibrary libSecond, + @Cached NegotiateCompatibleStringEncodingNode ropeNode) { final RubyEncoding firstEncoding = libFirst.getEncoding(first); final RubyEncoding secondEncoding = libSecond.getEncoding(second); - return ropeNode.executeNegotiate( - libFirst.getRope(first), + return ropeNode.execute( + libFirst.getTString(first), firstEncoding, - libSecond.getRope(second), + libSecond.getTString(second), secondEncoding); } @@ -259,11 +271,11 @@ protected RubyEncoding negotiateStringStringEncoding(Object first, Object second "firstEncoding != secondEncoding" }, limit = "getCacheLimit()") protected RubyEncoding negotiateStringObjectCached(Object first, Object second, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFirst, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libSecond, + @Cached RubyStringLibrary libFirst, + @Cached RubyStringLibrary libSecond, @Cached("getEncoding(first)") RubyEncoding firstEncoding, @Cached("getEncoding(second)") RubyEncoding secondEncoding, - @Cached("getCodeRange(first, libFirst)") CodeRange codeRange, + @Cached("getCodeRange(first, libFirst)") TruffleString.CodeRange codeRange, @Cached("negotiateStringObjectUncached(first, second, libFirst)") RubyEncoding negotiatedEncoding) { return negotiatedEncoding; } @@ -273,9 +285,9 @@ protected RubyEncoding negotiateStringObjectCached(Object first, Object second, "libFirst.isRubyString(first)", "getEncoding(first) != getEncoding(second)", "isNotRubyString(second)" }, - replaces = "negotiateStringObjectCached") + replaces = "negotiateStringObjectCached", limit = "1") protected RubyEncoding negotiateStringObjectUncached(Object first, Object second, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFirst) { + @Cached RubyStringLibrary libFirst) { final RubyEncoding firstEncoding = getEncoding(first); final RubyEncoding secondEncoding = getEncoding(second); @@ -283,15 +295,15 @@ protected RubyEncoding negotiateStringObjectUncached(Object first, Object second return null; } - if (!firstEncoding.jcoding.isAsciiCompatible() || !secondEncoding.jcoding.isAsciiCompatible()) { + if (!firstEncoding.isAsciiCompatible || !secondEncoding.isAsciiCompatible) { return null; } - if (secondEncoding.jcoding == USASCIIEncoding.INSTANCE) { + if (secondEncoding == Encodings.US_ASCII) { return firstEncoding; } - if (getCodeRange(first, libFirst) == CodeRange.CR_7BIT) { + if (getCodeRange(first, libFirst) == ASCII) { return secondEncoding; } @@ -302,9 +314,10 @@ protected RubyEncoding negotiateStringObjectUncached(Object first, Object second guards = { "libSecond.isRubyString(second)", "getEncoding(first) != getEncoding(second)", - "isNotRubyString(first)" }) + "isNotRubyString(first)" }, + limit = "1") protected RubyEncoding negotiateObjectString(Object first, Object second, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libSecond) { + @Cached RubyStringLibrary libSecond) { return negotiateStringObjectUncached(second, first, libSecond); } @@ -347,27 +360,27 @@ protected static RubyEncoding areCompatible(RubyEncoding enc1, RubyEncoding enc2 return null; } - if (!enc1.jcoding.isAsciiCompatible() || !enc2.jcoding.isAsciiCompatible()) { + if (!enc1.isAsciiCompatible || !enc2.isAsciiCompatible) { return null; } - if (enc2.jcoding == USASCIIEncoding.INSTANCE) { + if (enc2 == Encodings.US_ASCII) { return enc1; } - if (enc1.jcoding == USASCIIEncoding.INSTANCE) { + if (enc1 == Encodings.US_ASCII) { return enc2; } return null; } - protected CodeRange getCodeRange(Object string, RubyStringLibrary libString) { + protected TruffleString.CodeRange getCodeRange(Object string, RubyStringLibrary libString) { if (codeRangeNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); - codeRangeNode = insert(RopeNodes.CodeRangeNode.create()); + codeRangeNode = insert(TruffleString.GetByteCodeRangeNode.create()); } - return codeRangeNode.execute(libString.getRope(string)); + return codeRangeNode.execute(libString.getTString(string), libString.getTEncoding(string)); } protected RubyEncoding getEncoding(Object value) { @@ -404,6 +417,31 @@ protected Object isCompatible(Object first, Object second, } } + // encoding_compatible? but only accepting Strings for better footprint + @Primitive(name = "strings_compatible?") + public abstract static class AreStringsCompatibleNode extends CoreMethodArrayArgumentsNode { + public static AreStringsCompatibleNode create() { + return EncodingNodesFactory.AreStringsCompatibleNodeFactory.create(null); + } + + @Specialization + protected Object areCompatible(Object first, Object second, + @Cached RubyStringLibrary libFirst, + @Cached RubyStringLibrary libSecond, + @Cached NegotiateCompatibleStringEncodingNode negotiateCompatibleStringEncodingNode, + @Cached ConditionProfile noNegotiatedEncodingProfile) { + final RubyEncoding negotiatedEncoding = negotiateCompatibleStringEncodingNode.execute( + libFirst.getTString(first), libFirst.getEncoding(first), + libSecond.getTString(second), libSecond.getEncoding(second)); + + if (noNegotiatedEncodingProfile.profile(negotiatedEncoding == null)) { + return nil; + } + + return negotiatedEncoding; + } + } + @CoreMethod(names = "list", onSingleton = true) public abstract static class ListNode extends CoreMethodArrayArgumentsNode { @@ -428,7 +466,7 @@ protected ImmutableRubyString localeCharacterMap() { public abstract static class DummyNode extends CoreMethodArrayArgumentsNode { @Specialization protected boolean isDummy(RubyEncoding encoding) { - return encoding.jcoding.isDummy(); + return encoding.isDummy; } } @@ -452,7 +490,7 @@ protected Object allocate(RubyClass rubyClass) { public abstract static class EachAliasNode extends PrimitiveArrayArgumentsNode { @Child private CallBlockNode yieldNode = CallBlockNode.create(); - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromByteArrayNode fromByteArrayNode = TruffleString.FromByteArrayNode.create(); @TruffleBoundary @Specialization @@ -460,14 +498,14 @@ protected Object eachAlias(RubyProc block) { var iterator = EncodingDB.getAliases().entryIterator(); while (iterator.hasNext()) { var entry = iterator.next(); - final RubyString aliasName = makeStringNode.executeMake( + final RubyString aliasName = createString( + fromByteArrayNode, ArrayUtils.extractRange(entry.bytes, entry.p, entry.end), - Encodings.US_ASCII, - CodeRange.CR_7BIT); + Encodings.US_ASCII); // CR_7BIT yieldNode.yield( block, aliasName, - Encodings.getBuiltInEncoding(entry.value.getEncoding().getIndex())); + Encodings.getBuiltInEncoding(entry.value.getEncoding())); } return nil; } @@ -477,19 +515,18 @@ protected Object eachAlias(RubyProc block) { public abstract static class IsUnicodeNode extends PrimitiveArrayArgumentsNode { @Specialization protected boolean isUnicode(RubyEncoding encoding) { - return encoding.jcoding.isUnicode(); + return encoding.isUnicode; } } @Primitive(name = "get_actual_encoding") public abstract static class GetActualEncodingPrimitiveNode extends PrimitiveArrayArgumentsNode { - @Specialization(guards = "libString.isRubyString(string)") + @Specialization(guards = "libString.isRubyString(string)", limit = "1") protected RubyEncoding getActualEncoding(Object string, @Cached GetActualEncodingNode getActualEncodingNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - final Rope rope = libString.getRope(string); - return getActualEncodingNode.execute(rope, libString.getEncoding(string)); + @Cached RubyStringLibrary libString) { + return getActualEncodingNode.execute(libString.getTString(string), libString.getEncoding(string)); } } @@ -501,21 +538,25 @@ public static GetActualEncodingNode create() { return EncodingNodesFactory.GetActualEncodingNodeGen.create(); } - public abstract RubyEncoding execute(Rope rope, RubyEncoding encoding); + public abstract RubyEncoding execute(AbstractTruffleString tstring, RubyEncoding encoding); - @Specialization(guards = "!encoding.jcoding.isDummy()") - protected RubyEncoding getActualEncoding(Rope rope, RubyEncoding encoding) { + @Specialization(guards = "!encoding.isDummy") + protected RubyEncoding getActualEncoding(AbstractTruffleString tstring, RubyEncoding encoding) { return encoding; } @TruffleBoundary - @Specialization(guards = "encoding.jcoding.isDummy()") - protected RubyEncoding getActualEncodingDummy(Rope rope, RubyEncoding encoding) { - if (encoding.jcoding instanceof UnicodeEncoding) { + @Specialization(guards = "encoding.isDummy") + protected RubyEncoding getActualEncodingDummy(AbstractTruffleString tstring, RubyEncoding encoding, + @Cached TruffleString.ReadByteNode readByteNode) { + if (encoding.isUnicode) { + var enc = encoding.tencoding; + var byteLength = tstring.byteLength(enc); + // handle dummy UTF-16 and UTF-32 by scanning for BOM, as in MRI - if (encoding == Encodings.UTF16_DUMMY && rope.byteLength() >= 2) { - int c0 = rope.get(0) & 0xff; - int c1 = rope.get(1) & 0xff; + if (encoding == Encodings.UTF16_DUMMY && byteLength >= 2) { + int c0 = readByteNode.execute(tstring, 0, enc); + int c1 = readByteNode.execute(tstring, 1, enc); if (c0 == 0xFE && c1 == 0xFF) { return Encodings.UTF16BE; @@ -523,11 +564,11 @@ protected RubyEncoding getActualEncodingDummy(Rope rope, RubyEncoding encoding) return Encodings.UTF16LE; } return Encodings.BINARY; - } else if (encoding == Encodings.UTF32_DUMMY && rope.byteLength() >= 4) { - int c0 = rope.get(0) & 0xff; - int c1 = rope.get(1) & 0xff; - int c2 = rope.get(2) & 0xff; - int c3 = rope.get(3) & 0xff; + } else if (encoding == Encodings.UTF32_DUMMY && byteLength >= 4) { + int c0 = readByteNode.execute(tstring, 0, enc); + int c1 = readByteNode.execute(tstring, 1, enc); + int c2 = readByteNode.execute(tstring, 2, enc); + int c3 = readByteNode.execute(tstring, 3, enc); if (c0 == 0 && c1 == 0 && c2 == 0xFE && c3 == 0xFF) { return Encodings.UTF32BE; @@ -547,9 +588,8 @@ public abstract static class GetDefaultEncodingNode extends PrimitiveArrayArgume @TruffleBoundary @Specialization - protected Object getDefaultEncoding(Object name, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringLibrary) { - final RubyEncoding encoding = getEncoding(stringLibrary.getJavaString(name)); + protected Object getDefaultEncoding(Object name) { + final RubyEncoding encoding = getEncoding(RubyGuards.getJavaString(name)); if (encoding == null) { return nil; } else { @@ -613,36 +653,16 @@ protected Object noDefaultInternal(Nil encoding) { public abstract static class EncodingGetObjectEncodingNode extends PrimitiveArrayArgumentsNode { @Specialization - protected RubyEncoding encodingGetObjectEncodingString(RubyString object) { - return object.encoding; - } - - @Specialization - protected RubyEncoding encodingGetObjectEncodingImmutableString(ImmutableRubyString object) { - return object.encoding; - } - - @Specialization - protected RubyEncoding encodingGetObjectEncodingSymbol(RubySymbol object) { - return object.encoding; - } - - @Specialization - protected RubyEncoding encodingGetObjectEncoding(RubyEncoding object) { - return object; - } - - @Specialization - protected RubyEncoding encodingGetObjectEncodingRegexp(RubyRegexp object) { - return object.encoding; - } - - @Fallback - protected Object encodingGetObjectEncodingNil(Object object) { - // TODO(CS, 26 Jan 15) something to do with __encoding__ here? - return nil; + protected Object getObjectEncoding(Object object, + @Cached ToRubyEncodingNode toRubyEncodingNode, + @Cached ConditionProfile nullProfile) { + var rubyEncoding = toRubyEncodingNode.executeToEncoding(object); + if (nullProfile.profile(rubyEncoding == null)) { + return nil; + } else { + return rubyEncoding; + } } - } public abstract static class EncodingCreationNode extends PrimitiveArrayArgumentsNode { @@ -663,10 +683,11 @@ public RubyArray setIndexOrRaiseError(String name, RubyEncoding newEncoding) { @Primitive(name = "encoding_replicate") public abstract static class EncodingReplicateNode extends EncodingCreationNode { - @Specialization(guards = "strings.isRubyString(nameObject)") + @Specialization(guards = "strings.isRubyString(nameObject)", limit = "1") protected RubyArray encodingReplicate(RubyEncoding object, Object nameObject, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - final String name = strings.getJavaString(nameObject); + @Cached RubyStringLibrary strings, + @Cached ToJavaStringNode toJavaStringNode) { + final String name = toJavaStringNode.executeToJavaString(nameObject); final RubyEncoding newEncoding = replicate(name, object); return setIndexOrRaiseError(name, newEncoding); @@ -682,10 +703,11 @@ private RubyEncoding replicate(String name, RubyEncoding encoding) { @Primitive(name = "encoding_create_dummy") public abstract static class DummyEncodingNode extends EncodingCreationNode { - @Specialization(guards = "strings.isRubyString(nameObject)") + @Specialization(guards = "strings.isRubyString(nameObject)", limit = "1") protected RubyArray createDummyEncoding(Object nameObject, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - final String name = strings.getJavaString(nameObject); + @Cached RubyStringLibrary strings, + @Cached ToJavaStringNode toJavaStringNode) { + final String name = toJavaStringNode.executeToJavaString(nameObject); final RubyEncoding newEncoding = createDummy(name); return setIndexOrRaiseError(name, newEncoding); @@ -729,37 +751,30 @@ protected int getIndex(RubyEncoding encoding) { // MRI: rb_enc_check_str / rb_encoding_check (with Ruby String arguments) @Primitive(name = "encoding_ensure_compatible_str") public abstract static class CheckStringEncodingPrimitiveNode extends PrimitiveArrayArgumentsNode { - @Specialization(guards = { - "libFirst.isRubyString(first)", - "libSecond.isRubyString(second)", - }) - protected RubyEncoding checkEncodingStringStringUncached(Object first, Object second, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFirst, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libSecond, + @Specialization(guards = { "libFirst.isRubyString(first)", "libSecond.isRubyString(second)", }, limit = "1") + protected RubyEncoding checkEncodingStringString(Object first, Object second, + @Cached RubyStringLibrary libFirst, + @Cached RubyStringLibrary libSecond, @Cached BranchProfile errorProfile, - @Cached NegotiateCompatibleRopeEncodingNode negotiateCompatibleRopeEncodingNode) { + @Cached NegotiateCompatibleStringEncodingNode negotiateCompatibleStringEncodingNode) { final RubyEncoding firstEncoding = libFirst.getEncoding(first); final RubyEncoding secondEncoding = libSecond.getEncoding(second); - final RubyEncoding negotiatedEncoding = negotiateCompatibleRopeEncodingNode - .executeNegotiate( - libFirst.getRope(first), + final RubyEncoding negotiatedEncoding = negotiateCompatibleStringEncodingNode + .execute( + libFirst.getTString(first), firstEncoding, - libSecond.getRope(second), + libSecond.getTString(second), secondEncoding); if (negotiatedEncoding == null) { errorProfile.enter(); - throw new RaiseException(getContext(), coreExceptions().encodingCompatibilityErrorIncompatible( - firstEncoding.jcoding, - secondEncoding.jcoding, - this)); + throw new RaiseException(getContext(), + coreExceptions().encodingCompatibilityErrorIncompatible(firstEncoding, secondEncoding, this)); } return negotiatedEncoding; - } - } // MRI: rb_enc_check_str / rb_encoding_check (with RopeWithEncoding arguments) @@ -769,24 +784,24 @@ public static CheckStringEncodingNode create() { return EncodingNodesFactory.CheckStringEncodingNodeGen.create(); } - public abstract RubyEncoding executeCheckEncoding(RopeWithEncoding first, RopeWithEncoding second); + public abstract RubyEncoding executeCheckEncoding(AbstractTruffleString first, RubyEncoding firstEncoding, + AbstractTruffleString second, RubyEncoding secondEncoding); @Specialization - protected RubyEncoding checkEncoding(RopeWithEncoding first, RopeWithEncoding second, + protected RubyEncoding checkEncoding( + AbstractTruffleString first, + RubyEncoding firstEncoding, + AbstractTruffleString second, + RubyEncoding secondEncoding, @Cached BranchProfile errorProfile, - @Cached NegotiateCompatibleRopeEncodingNode negotiateCompatibleEncodingNode) { - final RubyEncoding negotiatedEncoding = negotiateCompatibleEncodingNode.executeNegotiate( - first.getRope(), - first.getEncoding(), - second.getRope(), - second.getEncoding()); + @Cached NegotiateCompatibleStringEncodingNode negotiateCompatibleEncodingNode) { + var negotiatedEncoding = negotiateCompatibleEncodingNode.execute(first, firstEncoding, second, + secondEncoding); if (negotiatedEncoding == null) { errorProfile.enter(); - throw new RaiseException(getContext(), coreExceptions().encodingCompatibilityErrorIncompatible( - first.getEncoding().jcoding, - second.getEncoding().jcoding, - this)); + throw new RaiseException(getContext(), + coreExceptions().encodingCompatibilityErrorIncompatible(firstEncoding, secondEncoding, this)); } return negotiatedEncoding; @@ -799,7 +814,7 @@ protected RubyEncoding checkEncoding(RopeWithEncoding first, RopeWithEncoding se public abstract static class CheckEncodingNode extends PrimitiveArrayArgumentsNode { @Child private NegotiateCompatibleEncodingNode negotiateCompatibleEncodingNode; - @Child private ToEncodingNode toEncodingNode; + @Child private ToRubyEncodingNode toRubyEncodingNode; public static CheckEncodingNode create() { return EncodingNodesFactory.CheckEncodingNodeFactory.create(null); @@ -829,14 +844,14 @@ private RubyEncoding executeNegotiate(Object first, Object second) { } private void raiseException(Object first, Object second) { - if (toEncodingNode == null) { + if (toRubyEncodingNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); - toEncodingNode = insert(ToEncodingNode.create()); + toRubyEncodingNode = insert(ToRubyEncodingNode.create()); } throw new RaiseException(getContext(), coreExceptions().encodingCompatibilityErrorIncompatible( - toEncodingNode.executeToEncoding(first), - toEncodingNode.executeToEncoding(second), + toRubyEncodingNode.executeToEncoding(first), + toRubyEncodingNode.executeToEncoding(second), this)); } diff --git a/src/main/java/org/truffleruby/core/encoding/Encodings.java b/src/main/java/org/truffleruby/core/encoding/Encodings.java index d06d7ac5bb87..db3fbb8203cc 100644 --- a/src/main/java/org/truffleruby/core/encoding/Encodings.java +++ b/src/main/java/org/truffleruby/core/encoding/Encodings.java @@ -24,12 +24,10 @@ import org.jcodings.specific.UTF32LEEncoding; import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyLanguage; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.core.string.FrozenStringLiterals; import org.truffleruby.core.string.ImmutableRubyString; +import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.string.TStringConstants; public class Encodings { @@ -46,12 +44,12 @@ public class Encodings { public static final RubyEncoding ISO_8859_1 = BUILT_IN_ENCODINGS[ISO8859_1Encoding.INSTANCE.getIndex()]; public static final RubyEncoding UTF16_DUMMY = BUILT_IN_ENCODINGS[EncodingDB .getEncodings() - .get(RopeOperations.encodeAsciiBytes("UTF-16")) + .get(StringOperations.encodeAsciiBytes("UTF-16")) .getEncoding() .getIndex()]; public static final RubyEncoding UTF32_DUMMY = BUILT_IN_ENCODINGS[EncodingDB .getEncodings() - .get(RopeOperations.encodeAsciiBytes("UTF-32")) + .get(StringOperations.encodeAsciiBytes("UTF-32")) .getEncoding() .getIndex()]; @@ -75,8 +73,7 @@ private static RubyEncoding[] initializeRubyEncodings() { rubyEncoding = US_ASCII; } else { final ImmutableRubyString name = FrozenStringLiterals.createStringAndCacheLater( - RopeConstants.ROPE_CONSTANTS.get(encoding.toString()), - US_ASCII); + TStringConstants.TSTRING_CONSTANTS.get(encoding.toString()), US_ASCII); rubyEncoding = new RubyEncoding(encoding, name, encoding.getIndex()); } encodings[encoding.getIndex()] = rubyEncoding; @@ -91,14 +88,17 @@ private static Encoding createDummyEncoding() { @TruffleBoundary public static RubyEncoding newRubyEncoding(RubyLanguage language, Encoding encoding, int index, byte[] name) { - final Rope rope = RopeOperations.create(name, USASCIIEncoding.INSTANCE, CodeRange.CR_7BIT); - final ImmutableRubyString string = language.getFrozenStringLiteral(rope); + var tstring = TStringUtils.fromByteArray(name, Encodings.US_ASCII); + final ImmutableRubyString string = language.getFrozenStringLiteral(tstring, Encodings.US_ASCII); return new RubyEncoding(encoding, string, index); } - public static RubyEncoding getBuiltInEncoding(int index) { - return BUILT_IN_ENCODINGS[index]; + /** Should only be used when there is no other way, because this will ignore replicated and dummy encodings */ + public static RubyEncoding getBuiltInEncoding(Encoding jcoding) { + var rubyEncoding = BUILT_IN_ENCODINGS[jcoding.getIndex()]; + assert rubyEncoding.jcoding == jcoding; + return rubyEncoding; } } diff --git a/src/main/java/org/truffleruby/core/encoding/IsCharacterHeadNode.java b/src/main/java/org/truffleruby/core/encoding/IsCharacterHeadNode.java index d3cfacfccb00..fd1efee2ac91 100644 --- a/src/main/java/org/truffleruby/core/encoding/IsCharacterHeadNode.java +++ b/src/main/java/org/truffleruby/core/encoding/IsCharacterHeadNode.java @@ -10,7 +10,10 @@ package org.truffleruby.core.encoding; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; +import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.Specialization; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.RubyBaseNode; /** Whether the position at byteOffset is the start of a character and not in the middle of a character */ @@ -20,25 +23,30 @@ public static IsCharacterHeadNode create() { return IsCharacterHeadNodeGen.create(); } - public abstract boolean execute(RubyEncoding enc, byte[] bytes, int byteOffset, int end); + public abstract boolean execute(RubyEncoding enc, AbstractTruffleString string, int byteOffset); - @Specialization(guards = "enc.jcoding.isSingleByte()") - protected boolean singleByte(RubyEncoding enc, byte[] bytes, int byteOffset, int end) { + @Specialization(guards = "enc.isSingleByte") + protected boolean singleByte(RubyEncoding enc, AbstractTruffleString string, int byteOffset) { // return offset directly (org.jcodings.SingleByteEncoding#leftAdjustCharHead) return true; } - @Specialization(guards = { "!enc.jcoding.isSingleByte()", "enc.jcoding.isUTF8()" }) - protected boolean utf8(RubyEncoding enc, byte[] bytes, int byteOffset, int end) { + @Specialization(guards = { "!enc.isSingleByte", "enc.jcoding.isUTF8()" }) + protected boolean utf8(RubyEncoding enc, AbstractTruffleString string, int byteOffset, + @Cached TruffleString.ReadByteNode readByteNode) { // based on org.jcodings.specific.BaseUTF8Encoding#leftAdjustCharHead - return utf8IsLead(bytes[byteOffset] & 0xff); + return utf8IsLead(readByteNode.execute(string, byteOffset, enc.tencoding)); } @TruffleBoundary - @Specialization(guards = { "!enc.jcoding.isSingleByte()", "!enc.jcoding.isUTF8()" }) - protected boolean other(RubyEncoding enc, byte[] bytes, int byteOffset, int end) { - return enc.jcoding.leftAdjustCharHead(bytes, 0, byteOffset, end) == byteOffset; + @Specialization(guards = { "!enc.isSingleByte", "!enc.jcoding.isUTF8()" }) + protected boolean other(RubyEncoding enc, AbstractTruffleString string, int byteOffset, + @Cached TruffleString.GetInternalByteArrayNode getInternalByteArrayNode) { + var byteArray = getInternalByteArrayNode.execute(string, enc.tencoding); + int addedOffsets = byteArray.getOffset() + byteOffset; + return enc.jcoding.leftAdjustCharHead(byteArray.getArray(), byteArray.getOffset(), addedOffsets, + byteArray.getEnd()) == addedOffsets; } /** Copied from org.jcodings.specific.BaseUTF8Encoding */ diff --git a/src/main/java/org/truffleruby/core/encoding/RubyEncoding.java b/src/main/java/org/truffleruby/core/encoding/RubyEncoding.java index 737a69885344..3870fb6d1767 100644 --- a/src/main/java/org/truffleruby/core/encoding/RubyEncoding.java +++ b/src/main/java/org/truffleruby/core/encoding/RubyEncoding.java @@ -14,17 +14,16 @@ import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.library.ExportLibrary; import com.oracle.truffle.api.library.ExportMessage; +import com.oracle.truffle.api.strings.TruffleString; import org.jcodings.Encoding; import org.jcodings.specific.USASCIIEncoding; import org.truffleruby.RubyContext; import org.truffleruby.core.kernel.KernelNodes; import org.truffleruby.core.klass.RubyClass; -import org.truffleruby.core.rope.LeafRope; -import org.truffleruby.core.rope.RopeConstants; import org.truffleruby.core.string.FrozenStringLiterals; import org.truffleruby.core.string.ImmutableRubyString; - import org.truffleruby.language.ImmutableRubyObjectNotCopyable; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.language.dispatch.DispatchNode; import org.truffleruby.language.objects.ObjectGraph; import org.truffleruby.language.objects.ObjectGraphNode; @@ -33,24 +32,49 @@ import java.util.Set; @ExportLibrary(InteropLibrary.class) -public class RubyEncoding extends ImmutableRubyObjectNotCopyable implements ObjectGraphNode, Comparable { +public final class RubyEncoding extends ImmutableRubyObjectNotCopyable + implements ObjectGraphNode, Comparable { public final Encoding jcoding; + public final TruffleString.Encoding tencoding; public final ImmutableRubyString name; public final int index; + // Copy these properties here for faster access and to make the fields final (most of these fields are not final in JCodings) + public final boolean isDummy; + public final boolean isAsciiCompatible; + public final boolean isFixedWidth; + public final boolean isSingleByte; + public final boolean isUnicode; + public RubyEncoding(Encoding jcoding, ImmutableRubyString name, int index) { + assert name.getEncodingUncached() == Encodings.US_ASCII; this.jcoding = Objects.requireNonNull(jcoding); + this.tencoding = Objects.requireNonNull(TStringUtils.jcodingToTEncoding(jcoding)); this.name = Objects.requireNonNull(name); this.index = index; + + this.isDummy = jcoding.isDummy(); + this.isAsciiCompatible = jcoding.isAsciiCompatible(); + this.isFixedWidth = jcoding.isFixedWidth(); + this.isSingleByte = jcoding.isSingleByte(); + this.isUnicode = jcoding.isUnicode(); } // Special constructor to define US-ASCII encoding which is used for RubyEncoding names public RubyEncoding(int index) { this.jcoding = Objects.requireNonNull(USASCIIEncoding.INSTANCE); + this.tencoding = Objects.requireNonNull(TruffleString.Encoding.US_ASCII); this.name = Objects.requireNonNull( - FrozenStringLiterals.createStringAndCacheLater((LeafRope) RopeConstants.US_ASCII, this)); + FrozenStringLiterals.createStringAndCacheLater(TStringConstants.US_ASCII, this)); this.index = index; + + var jcoding = this.jcoding; + this.isDummy = jcoding.isDummy(); + this.isAsciiCompatible = jcoding.isAsciiCompatible(); + this.isFixedWidth = jcoding.isFixedWidth(); + this.isSingleByte = jcoding.isSingleByte(); + this.isUnicode = jcoding.isUnicode(); } @Override @@ -92,7 +116,7 @@ public int compareTo(RubyEncoding o) { if (index != o.index) { return index - o.index; } else { - return name.rope.compareTo(o.name.rope); + return name.tstring.compareBytesUncached(o.name.tstring, Encodings.US_ASCII.tencoding); } } } diff --git a/src/main/java/org/truffleruby/core/encoding/TStringUtils.java b/src/main/java/org/truffleruby/core/encoding/TStringUtils.java new file mode 100644 index 000000000000..4f6a0a4dfc39 --- /dev/null +++ b/src/main/java/org/truffleruby/core/encoding/TStringUtils.java @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2014, 2021 Oracle and/or its affiliates. All rights reserved. This + * code is released under a tri EPL/GPL/LGPL license. You can use it, + * redistribute it and/or modify it under the terms of the: + * + * Eclipse Public License version 2.0, or + * GNU General Public License version 2, or + * GNU Lesser General Public License version 2.1. + * + * Some of the code in this class is modified from org.jruby.runtime.encoding.EncodingService, + * licensed under the same EPL 2.0/GPL 2.0/LGPL 2.1 used throughout. + */ +package org.truffleruby.core.encoding; + +import com.oracle.truffle.api.CompilerAsserts; +import com.oracle.truffle.api.CompilerDirectives; +import com.oracle.truffle.api.profiles.ConditionProfile; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import org.jcodings.Encoding; + +import com.oracle.truffle.api.strings.TruffleString; +import org.truffleruby.core.array.ArrayUtils; +import org.truffleruby.core.string.CannotConvertBinaryRubyStringToJavaString; +import org.truffleruby.core.string.StringGuards; + +import static com.oracle.truffle.api.strings.TruffleString.CodeRange.ASCII; + +public class TStringUtils { + + public static TruffleString.Encoding jcodingToTEncoding(Encoding jcoding) { + var jcodingName = jcoding.toString(); + if (jcodingName.equals("UTF-16")) { + // We use UTF_16BE because JCodings resolves UTF-16 to UTF16BEEncoding(dummy=true) + // See org.jcodings.EncodingDB.dummy_unicode + return TruffleString.Encoding.UTF_16BE; + } else if (jcodingName.equals("UTF-32")) { + // We use UTF_32BE because JCodings resolves UTF-32 to UTF32BEEncoding(dummy=true) + // See org.jcodings.EncodingDB.dummy_unicode + return TruffleString.Encoding.UTF_32BE; + } else { + return TruffleString.Encoding.fromJCodingName(jcodingName); + } + } + + public static TruffleString fromByteArray(byte[] bytes, TruffleString.Encoding tencoding) { + CompilerAsserts.neverPartOfCompilation( + "Use createString(TruffleString.FromByteArrayNode, byte[], RubyEncoding) instead"); + return TruffleString.fromByteArrayUncached(bytes, 0, bytes.length, tencoding, false); + } + + public static TruffleString fromByteArray(byte[] bytes, RubyEncoding rubyEncoding) { + return fromByteArray(bytes, rubyEncoding.tencoding); + } + + public static TruffleString utf8TString(String javaString) { + return fromJavaString(javaString, TruffleString.Encoding.UTF_8); + } + + public static TruffleString usAsciiString(String javaString) { + return fromJavaString(javaString, TruffleString.Encoding.US_ASCII); + } + + public static TruffleString fromJavaString(String javaString, TruffleString.Encoding encoding) { + CompilerAsserts.neverPartOfCompilation( + "Use createString(TruffleString.FromJavaStringNode, String, RubyEncoding) instead"); + return TruffleString.fromJavaStringUncached(javaString, encoding); + } + + public static TruffleString fromJavaString(String javaString, RubyEncoding encoding) { + return fromJavaString(javaString, encoding.tencoding); + } + + // Should be avoided as much as feasible + public static byte[] getBytesOrCopy(AbstractTruffleString tstring, RubyEncoding encoding) { + CompilerAsserts.neverPartOfCompilation("uncached"); + var bytes = tstring.getInternalByteArrayUncached(encoding.tencoding); + if (tstring instanceof TruffleString && bytes.getOffset() == 0 && + bytes.getLength() == bytes.getArray().length) { + return bytes.getArray(); + } else { + return ArrayUtils.extractRange(bytes.getArray(), bytes.getOffset(), bytes.getEnd()); + } + } + + // Should be avoided as much as feasible + public static byte[] getBytesOrCopy(AbstractTruffleString tstring, TruffleString.Encoding encoding, + TruffleString.GetInternalByteArrayNode getInternalByteArrayNode, + ConditionProfile noCopyProfile) { + var bytes = getInternalByteArrayNode.execute(tstring, encoding); + if (noCopyProfile.profile(tstring instanceof TruffleString && bytes.getOffset() == 0 && + bytes.getLength() == bytes.getArray().length)) { + return bytes.getArray(); + } else { + return ArrayUtils.extractRange(bytes.getArray(), bytes.getOffset(), bytes.getEnd()); + } + } + + private static final boolean DEBUG_NON_ZERO_OFFSET = Boolean + .getBoolean("truffle.strings.debug-non-zero-offset-arrays"); + + public static byte[] getBytesOrFail(AbstractTruffleString tstring, RubyEncoding encoding) { + CompilerAsserts.neverPartOfCompilation("uncached"); + if (DEBUG_NON_ZERO_OFFSET) { + return getBytesOrCopy(tstring, encoding); + } else { + var byteArray = tstring.getInternalByteArrayUncached(encoding.tencoding); + if (byteArray.getOffset() != 0 || byteArray.getLength() != byteArray.getArray().length) { + throw CompilerDirectives.shouldNotReachHere(); + } + return byteArray.getArray(); + } + } + + public static byte[] getBytesOrFail(AbstractTruffleString tstring, RubyEncoding encoding, + TruffleString.GetInternalByteArrayNode byteArrayNode) { + if (DEBUG_NON_ZERO_OFFSET) { + return getBytesOrCopy(tstring, encoding); + } else { + var byteArray = byteArrayNode.execute(tstring, encoding.tencoding); + if (byteArray.getOffset() != 0 || byteArray.getLength() != byteArray.getArray().length) { + throw CompilerDirectives.shouldNotReachHere(); + } + return byteArray.getArray(); + } + } + + public static boolean isSingleByteOptimizable(AbstractTruffleString truffleString, RubyEncoding encoding) { + CompilerAsserts.neverPartOfCompilation("Use SingleByteOptimizableNode instead"); + return truffleString.getByteCodeRangeUncached(encoding.tencoding) == ASCII || encoding.isSingleByte; + } + + public static String toJavaStringOrThrow(AbstractTruffleString tstring, RubyEncoding encoding) { + CompilerAsserts.neverPartOfCompilation("uncached"); + if (encoding == Encodings.BINARY && !StringGuards.is7BitUncached(tstring, encoding)) { + int length = tstring.byteLength(encoding.tencoding); + for (int i = 0; i < length; i++) { + final int b = tstring.readByteUncached(i, encoding.tencoding); + if (!Encoding.isAscii(b)) { + throw new CannotConvertBinaryRubyStringToJavaString(b); + } + } + throw CompilerDirectives.shouldNotReachHere(); + } else { + return tstring.toJavaStringUncached(); + } + } +} diff --git a/src/main/java/org/truffleruby/core/encoding/TranscodingManager.java b/src/main/java/org/truffleruby/core/encoding/TranscodingManager.java index 126d563c42f6..41d857cef1e5 100644 --- a/src/main/java/org/truffleruby/core/encoding/TranscodingManager.java +++ b/src/main/java/org/truffleruby/core/encoding/TranscodingManager.java @@ -32,6 +32,7 @@ */ package org.truffleruby.core.encoding; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -44,7 +45,6 @@ import org.jcodings.util.Hash; import com.oracle.truffle.api.TruffleOptions; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.core.string.StringUtils; /** This class computes all direct transcoder paths for both JVM and Native Image as a convenient-to-access Map. On @@ -59,8 +59,9 @@ public class TranscodingManager { for (Hash.HashEntry destinationEntry : sourceEntry.entryIterator()) { final TranscoderDB.Entry e = destinationEntry.value; - final String sourceName = StringUtils.toUpperCase(RopeOperations.decodeAscii(e.getSource())); - final String destinationName = StringUtils.toUpperCase(RopeOperations.decodeAscii(e.getDestination())); + final String sourceName = StringUtils.toUpperCase(new String(e.getSource(), StandardCharsets.US_ASCII)); + final String destinationName = StringUtils + .toUpperCase(new String(e.getDestination(), StandardCharsets.US_ASCII)); if (TruffleOptions.AOT) { // Load the classes eagerly diff --git a/src/main/java/org/truffleruby/core/exception/CoreExceptions.java b/src/main/java/org/truffleruby/core/exception/CoreExceptions.java index 37c0106d3ff7..1cb976fc4dbe 100644 --- a/src/main/java/org/truffleruby/core/exception/CoreExceptions.java +++ b/src/main/java/org/truffleruby/core/exception/CoreExceptions.java @@ -17,13 +17,10 @@ import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.interop.InvalidBufferOffsetException; import com.oracle.truffle.api.interop.UnknownKeyException; -import org.jcodings.Encoding; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.core.array.RubyArray; import org.truffleruby.core.binding.RubyBinding; -import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.exception.ExceptionOperations.ExceptionFormatter; import org.truffleruby.core.klass.RubyClass; @@ -31,14 +28,14 @@ import org.truffleruby.core.module.RubyModule; import org.truffleruby.core.proc.RubyProc; import org.truffleruby.core.range.RubyIntRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.core.string.CoreStrings; import org.truffleruby.core.string.RubyString; import org.truffleruby.core.string.StringOperations; import org.truffleruby.core.string.StringUtils; import org.truffleruby.core.thread.ThreadNodes.ThreadGetExceptionNode; import org.truffleruby.language.Nil; +import org.truffleruby.language.RubyBaseNode; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.backtrace.Backtrace; import org.truffleruby.language.backtrace.BacktraceFormatter; import org.truffleruby.language.backtrace.BacktraceFormatter.FormattingFlags; @@ -95,16 +92,13 @@ public void showExceptionIfDebug(RubyClass rubyClass, Object message, Backtrace from = " at " + debugBacktraceFormatter.formatLine(backtrace.getStackTrace(), 0, null); } if (RubyStringLibrary.getUncached().isRubyString(message)) { - message = RubyStringLibrary.getUncached().getJavaString(message); + message = RubyGuards.getJavaString(message); } final String output = "Exception `" + exceptionClass + "'" + from + " - " + message + "\n"; if (context.getCoreLibrary().isLoaded()) { - RubyString outputString = StringOperations - .createUTF8String( - context, - language, - StringOperations.encodeRope(output, UTF8Encoding.INSTANCE)); + RubyString outputString = StringOperations.createUTF8String(context, language, output); Object stderr = context.getCoreLibrary().getStderr(); + DispatchNode.getUncached().call(stderr, "write", outputString); } else { context.getEnvErrStream().println(output); @@ -116,56 +110,60 @@ public void showExceptionIfDebug(RubyClass rubyClass, Object message, Backtrace public String inspect(Object value) { Object rubyString = DispatchNode.getUncached().call( context.getCoreLibrary().truffleTypeModule, "rb_inspect", value); - return RubyStringLibrary.getUncached().getJavaString(rubyString); + return RubyGuards.getJavaString(rubyString); } @TruffleBoundary public String inspectReceiver(Object receiver) { Object rubyString = DispatchNode.getUncached().call( context.getCoreLibrary().truffleExceptionOperationsModule, "receiver_string", receiver); - return RubyStringLibrary.getUncached().getJavaString(rubyString); + return RubyGuards.getJavaString(rubyString); } // ArgumentError - public RubyException argumentErrorOneHashRequired(Node currentNode) { - return argumentError(coreStrings().ONE_HASH_REQUIRED.getRope(), Encodings.BINARY, currentNode, null); - } - - public RubyException argumentError(Rope message, RubyEncoding encoding, Node currentNode) { - return argumentError(message, encoding, currentNode, null); + public RubyException argumentErrorOneHashRequired(RubyBaseNode currentNode) { + return argumentError(coreStrings().ONE_HASH_REQUIRED.createInstance(currentNode.getContext()), currentNode, + null); } public RubyException argumentError(String message, Node currentNode) { return argumentError(message, currentNode, null); } - public RubyException argumentErrorProcWithoutBlock(Node currentNode) { - return argumentError(coreStrings().PROC_WITHOUT_BLOCK.getRope(), Encodings.BINARY, currentNode, null); + public RubyException argumentErrorProcWithoutBlock(RubyBaseNode currentNode) { + return argumentError(coreStrings().PROC_WITHOUT_BLOCK.createInstance(currentNode.getContext()), currentNode, + null); } - public RubyException argumentErrorTooFewArguments(Node currentNode) { - return argumentError(coreStrings().TOO_FEW_ARGUMENTS.getRope(), Encodings.BINARY, currentNode, null); + public RubyException argumentErrorTooFewArguments(RubyBaseNode currentNode) { + return argumentError(coreStrings().TOO_FEW_ARGUMENTS.createInstance(currentNode.getContext()), currentNode, + null); } - public RubyException argumentErrorTimeIntervalPositive(Node currentNode) { - return argumentError(coreStrings().TIME_INTERVAL_MUST_BE_POS.getRope(), Encodings.BINARY, currentNode, null); + public RubyException argumentErrorTimeIntervalPositive(RubyBaseNode currentNode) { + return argumentError(coreStrings().TIME_INTERVAL_MUST_BE_POS.createInstance(currentNode.getContext()), + currentNode, null); } - public RubyException argumentErrorXOutsideOfString(Node currentNode) { - return argumentError(coreStrings().X_OUTSIDE_OF_STRING.getRope(), Encodings.BINARY, currentNode, null); + public RubyException argumentErrorXOutsideOfString(RubyBaseNode currentNode) { + return argumentError(coreStrings().X_OUTSIDE_OF_STRING.createInstance(currentNode.getContext()), currentNode, + null); } - public RubyException argumentErrorCantCompressNegativeNumbers(Node currentNode) { - return argumentError(coreStrings().CANT_COMPRESS_NEGATIVE.getRope(), Encodings.BINARY, currentNode, null); + public RubyException argumentErrorCantCompressNegativeNumbers(RubyBaseNode currentNode) { + return argumentError(coreStrings().CANT_COMPRESS_NEGATIVE.createInstance(currentNode.getContext()), currentNode, + null); } - public RubyException argumentErrorOutOfRange(Node currentNode) { - return argumentError(coreStrings().ARGUMENT_OUT_OF_RANGE.getRope(), Encodings.BINARY, currentNode, null); + public RubyException argumentErrorOutOfRange(RubyBaseNode currentNode) { + return argumentError(coreStrings().ARGUMENT_OUT_OF_RANGE.createInstance(currentNode.getContext()), currentNode, + null); } - public RubyException argumentErrorNegativeArraySize(Node currentNode) { - return argumentError(coreStrings().NEGATIVE_ARRAY_SIZE.getRope(), Encodings.BINARY, currentNode, null); + public RubyException argumentErrorNegativeArraySize(RubyBaseNode currentNode) { + return argumentError(coreStrings().NEGATIVE_ARRAY_SIZE.createInstance(currentNode.getContext()), currentNode, + null); } public RubyException argumentErrorTooLargeString(Node currentNode) { @@ -237,8 +235,9 @@ public RubyException argumentErrorMinMaxArity(int passed, int minArity, int maxA } } - public RubyException argumentErrorEmptyVarargs(Node currentNode) { - return argumentError(coreStrings().WRONG_ARGS_ZERO_PLUS_ONE.getRope(), Encodings.BINARY, currentNode, null); + public RubyException argumentErrorEmptyVarargs(RubyBaseNode currentNode) { + return argumentError(coreStrings().WRONG_ARGS_ZERO_PLUS_ONE.createInstance(currentNode.getContext()), + currentNode, null); } @TruffleBoundary @@ -250,9 +249,8 @@ public RubyException argumentErrorWrongArgumentType(Object object, String expect } @TruffleBoundary - public RubyException argumentErrorInvalidStringToInteger(Rope rope, Node currentNode) { - final String formattedObject = RopeOperations.decodeRope(rope); - return argumentError(StringUtils.format("invalid value for Integer(): %s", formattedObject), currentNode); + public RubyException argumentErrorInvalidStringToInteger(String string, Node currentNode) { + return argumentError("invalid value for Integer(): " + string, currentNode); } @TruffleBoundary @@ -267,21 +265,13 @@ public RubyException argumentErrorEncodingAlreadyRegistered(String nameString, N @TruffleBoundary public RubyException argumentError(String message, Node currentNode, Throwable javaThrowable) { - return argumentError( - StringOperations.encodeRope(message, UTF8Encoding.INSTANCE), - Encodings.UTF_8, - currentNode, - javaThrowable); + return argumentError(StringOperations.createUTF8String(context, language, + message), currentNode, javaThrowable); } - public RubyException argumentError(Rope message, RubyEncoding encoding, Node currentNode, Throwable javaThrowable) { + public RubyException argumentError(RubyString message, Node currentNode, Throwable javaThrowable) { RubyClass exceptionClass = context.getCoreLibrary().argumentErrorClass; - return ExceptionOperations.createRubyException( - context, - exceptionClass, - StringOperations.createString(currentNode, message, encoding), - currentNode, - javaThrowable); + return ExceptionOperations.createRubyException(context, exceptionClass, message, currentNode, javaThrowable); } @TruffleBoundary @@ -302,7 +292,7 @@ public RubyException frozenError(Object object, Node currentNode) { public RubyException frozenError(String message, Node currentNode, Object receiver) { RubyClass exceptionClass = context.getCoreLibrary().frozenErrorClass; RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + .createUTF8String(context, language, message); final Backtrace backtrace = context.getCallStack().getBacktrace(currentNode); final Object cause = ThreadGetExceptionNode.getLastException(language); showExceptionIfDebug(exceptionClass, errorMessage, backtrace); @@ -329,7 +319,7 @@ public RubyException runtimeErrorCoverageNotEnabled(Node currentNode) { public RubyException runtimeError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().runtimeErrorClass; RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + .createUTF8String(context, language, message); return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -337,7 +327,7 @@ public RubyException runtimeError(String message, Node currentNode) { public RubyException runtimeError(String message, Node currentNode, Throwable javaThrowable) { RubyClass exceptionClass = context.getCoreLibrary().runtimeErrorClass; RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + .createUTF8String(context, language, message); return ExceptionOperations .createRubyException(context, exceptionClass, errorMessage, currentNode, javaThrowable); } @@ -346,7 +336,7 @@ public RubyException runtimeError(String message, Node currentNode, Throwable ja public RubyException runtimeError(String message, Backtrace backtrace) { RubyClass exceptionClass = context.getCoreLibrary().runtimeErrorClass; RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + .createUTF8String(context, language, message); return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, backtrace); } @@ -362,7 +352,7 @@ public RubyException systemStackErrorStackLevelTooDeep(Node currentNode, StackOv final String message = coreStrings().STACK_LEVEL_TOO_DEEP + "\n\tfrom " + topOfTheStack; final Backtrace backtrace = context.getCallStack().getBacktrace(currentNode, 0, javaThrowable); final RubyString messageString = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + .createUTF8String(context, language, message); return ExceptionOperations.createSystemStackError(context, messageString, backtrace, showExceptionIfDebug); } @@ -424,11 +414,10 @@ public RubyException mathDomainErrorLog(Node currentNode) { @TruffleBoundary public RubyException mathDomainError(String method, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().mathDomainErrorClass; - Rope rope = StringOperations.encodeRope( - StringUtils.format("Numerical argument is out of domain - \"%s\"", method), - UTF8Encoding.INSTANCE); - RubyString errorMessage = StringOperations.createUTF8String(context, language, rope); + RubyString errorMessage = StringOperations.createUTF8String(context, language, + StringUtils.format("Numerical argument is out of domain - \"%s\"", method)); final Backtrace backtrace = context.getCallStack().getBacktrace(currentNode); + return ExceptionOperations .createSystemCallError( context, @@ -444,7 +433,7 @@ public RubyException mathDomainError(String method, Node currentNode) { public RubyException indexError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().indexErrorClass; RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + .createUTF8String(context, language, message); return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -489,7 +478,7 @@ public RubyException indexErrorInvalidBufferOffsetException(InvalidBufferOffsetE public RubyException keyError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().keyErrorClass; RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + .createUTF8String(context, language, message); return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -504,7 +493,7 @@ public RubyException keyError(UnknownKeyException exception, Node currentNode) { public RubyException stopIteration(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().stopIterationClass; RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + .createUTF8String(context, language, message); return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -514,7 +503,7 @@ public RubyException stopIteration(String message, Node currentNode) { public RubyException localJumpError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().localJumpErrorClass; RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + .createUTF8String(context, language, message); return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -678,7 +667,7 @@ public RubyException typeErrorRescueInvalidClause(Node currentNode) { public RubyException typeError(String message, Node currentNode, Throwable javaThrowable) { RubyClass exceptionClass = context.getCoreLibrary().typeErrorClass; RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + .createUTF8String(context, language, message); return ExceptionOperations .createRubyException(context, exceptionClass, errorMessage, currentNode, javaThrowable); } @@ -686,8 +675,7 @@ public RubyException typeError(String message, Node currentNode, Throwable javaT @TruffleBoundary public RubyException typeErrorUnsupportedTypeException(UnsupportedTypeException exception, Node currentNode) { RubyArray rubyArray = createArray(context, language, exception.getSuppliedValues()); - String formattedValues = RubyStringLibrary.getUncached() - .getJavaString(DispatchNode.getUncached().call(rubyArray, "inspect")); + String formattedValues = RubyGuards.getJavaString(DispatchNode.getUncached().call(rubyArray, "inspect")); return typeError("unsupported type " + formattedValues, currentNode); } @@ -846,7 +834,7 @@ public RubyNameError nameErrorUnknownIdentifierException( @TruffleBoundary public RubyNameError nameError(String message, Object receiver, String name, Node currentNode) { final RubyString messageString = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + .createUTF8String(context, language, message); final RubyClass exceptionClass = context.getCoreLibrary().nameErrorClass; final Backtrace backtrace = context.getCallStack().getBacktrace(currentNode); final Object cause = ThreadGetExceptionNode.getLastException(language); @@ -912,15 +900,17 @@ public RubyNoMethodError noMethodErrorFromMethodMissing(ExceptionFormatter forma return exception; } + @TruffleBoundary public RubyNoMethodError noMethodError(String message, Object receiver, String name, Object[] args, Node currentNode) { - final RubyString messageString = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + final RubyString messageString = StringOperations.createUTF8String(context, language, message); final RubyArray argsArray = createArray(context, language, args); final RubyClass exceptionClass = context.getCoreLibrary().noMethodErrorClass; final Backtrace backtrace = context.getCallStack().getBacktrace(currentNode); final Object cause = ThreadGetExceptionNode.getLastException(language); + showExceptionIfDebug(exceptionClass, messageString, backtrace); + return new RubyNoMethodError( context.getCoreLibrary().noMethodErrorClass, language.noMethodErrorShape, @@ -934,10 +924,8 @@ public RubyNoMethodError noMethodError(String message, Object receiver, String n @TruffleBoundary public RubyNoMethodError noSuperMethodOutsideMethodError(Node currentNode) { - final RubyString messageString = StringOperations.createUTF8String( - context, - language, - StringOperations.encodeRope("super called outside of method", UTF8Encoding.INSTANCE)); + final RubyString messageString = StringOperations.createUTF8String(context, language, + "super called outside of method"); final RubyClass exceptionClass = context.getCoreLibrary().nameErrorClass; final Backtrace backtrace = context.getCallStack().getBacktrace(currentNode); final Object cause = ThreadGetExceptionNode.getLastException(language); @@ -964,20 +952,19 @@ public RubyNoMethodError noMethodErrorUnknownIdentifier(Object receiver, String @TruffleBoundary public RubyException loadError(String message, String path, Node currentNode) { - RubyString messageString = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + RubyString messageString = StringOperations.createUTF8String(context, language, message); RubyClass exceptionClass = context.getCoreLibrary().loadErrorClass; RubyException loadError = ExceptionOperations .createRubyException(context, exceptionClass, messageString, currentNode, null); + if ("openssl.so".equals(path)) { // This is a workaround for the rubygems/security.rb file expecting the error path to be openssl path = "openssl"; } - DynamicObjectLibrary.getUncached().put( - loadError, - "@path", - StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(path, UTF8Encoding.INSTANCE))); + + DynamicObjectLibrary.getUncached().put(loadError, "@path", + StringOperations.createUTF8String(context, language, path)); + return loadError; } @@ -1000,11 +987,8 @@ public RubyException zeroDivisionError(Node currentNode) { @TruffleBoundary public RubyException zeroDivisionError(Node currentNode, ArithmeticException exception) { RubyClass exceptionClass = context.getCoreLibrary().zeroDivisionErrorClass; - RubyString errorMessage = StringOperations - .createUTF8String( - context, - language, - StringOperations.encodeRope("divided by 0", UTF8Encoding.INSTANCE)); + RubyString errorMessage = StringOperations.createUTF8String(context, language, "divided by 0"); + return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, exception); } @@ -1030,7 +1014,7 @@ public RubySyntaxError syntaxError(String message, Node currentNode, SourceSecti public RubySyntaxError syntaxErrorAlreadyWithFileLine(String message, Node currentNode, SourceSection sourceLocation) { final RubyString messageString = StringOperations.createUTF8String(context, language, - StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + message); RubyClass exceptionClass = context.getCoreLibrary().syntaxErrorClass; final Backtrace backtrace = context.getCallStack().getBacktrace(currentNode); final Object cause = ThreadGetExceptionNode.getLastException(language); @@ -1049,8 +1033,8 @@ public RubySyntaxError syntaxErrorAlreadyWithFileLine(String message, Node curre @TruffleBoundary public RubyException floatDomainError(String value, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().floatDomainErrorClass; - RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(value, UTF8Encoding.INSTANCE)); + RubyString errorMessage = StringOperations.createUTF8String(context, language, value); + return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -1075,8 +1059,8 @@ public RubyException floatDomainErrorSqrtNegative(Node currentNode) { @TruffleBoundary public RubyException ioError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().ioErrorClass; - RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + RubyString errorMessage = StringOperations.createUTF8String(context, language, message); + return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -1090,7 +1074,7 @@ public RubyException ioError(IOException exception, Node currentNode) { @TruffleBoundary public RubyException rangeError(long code, RubyEncoding encoding, Node currentNode) { return rangeError( - StringUtils.format("invalid codepoint %x in %s", code, encoding.jcoding), + StringUtils.format("invalid codepoint %x in %s", code, encoding), currentNode); } @@ -1121,8 +1105,8 @@ public RubyException rangeErrorConvertToInt(long value, Node currentNode) { @TruffleBoundary public RubyException rangeError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().rangeErrorClass; - RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + RubyString errorMessage = StringOperations.createUTF8String(context, language, message); + return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -1139,8 +1123,8 @@ public RubyException graalErrorAssertNotCompiledCompiled(Node currentNode) { @TruffleBoundary private RubyException graalError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().graalErrorClass; - RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + RubyString errorMessage = StringOperations.createUTF8String(context, language, message); + return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -1149,8 +1133,8 @@ private RubyException graalError(String message, Node currentNode) { @TruffleBoundary public RubyException regexpError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().regexpErrorClass; - RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + RubyString errorMessage = StringOperations.createUTF8String(context, language, message); + return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -1159,20 +1143,21 @@ public RubyException regexpError(String message, Node currentNode) { @TruffleBoundary public RubyException encodingError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().encodingErrorClass; - RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + RubyString errorMessage = StringOperations.createUTF8String(context, language, message); + return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @TruffleBoundary - public RubyException encodingCompatibilityErrorIncompatible(Encoding a, Encoding b, Node currentNode) { + public RubyException encodingCompatibilityErrorIncompatible(RubyEncoding a, RubyEncoding b, Node currentNode) { return encodingCompatibilityError( StringUtils.format("incompatible character encodings: %s and %s", a, b), currentNode); } @TruffleBoundary - public RubyException encodingCompatibilityErrorRegexpIncompatible(Encoding a, Encoding b, Node currentNode) { + public RubyException encodingCompatibilityErrorRegexpIncompatible(RubyEncoding a, RubyEncoding b, + Node currentNode) { return encodingCompatibilityError( StringUtils.format("incompatible encoding regexp match (%s regexp with %s string)", a, b), currentNode); @@ -1180,7 +1165,7 @@ public RubyException encodingCompatibilityErrorRegexpIncompatible(Encoding a, En @TruffleBoundary - public RubyException encodingCompatibilityErrorIncompatibleWithOperation(Encoding encoding, Node currentNode) { + public RubyException encodingCompatibilityErrorIncompatibleWithOperation(RubyEncoding encoding, Node currentNode) { return encodingCompatibilityError( StringUtils.format("incompatible encoding with this operation: %s", encoding), currentNode); @@ -1189,8 +1174,8 @@ public RubyException encodingCompatibilityErrorIncompatibleWithOperation(Encodin @TruffleBoundary public RubyException encodingCompatibilityError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().encodingCompatibilityErrorClass; - RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + RubyString errorMessage = StringOperations.createUTF8String(context, language, message); + return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -1210,8 +1195,8 @@ public RubyException encodingUndefinedConversionError(Node currentNode) { @TruffleBoundary public RubyException fiberError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().fiberErrorClass; - RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + RubyString errorMessage = StringOperations.createUTF8String(context, language, message); + return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -1228,8 +1213,8 @@ public RubyException yieldFromRootFiberError(Node currentNode) { @TruffleBoundary public RubyException threadError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().threadErrorClass; - RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + RubyString errorMessage = StringOperations.createUTF8String(context, language, message); + return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -1258,8 +1243,8 @@ public RubyException threadErrorQueueFull(Node currentNode) { @TruffleBoundary public RubyException securityError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().securityErrorClass; - RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + RubyString errorMessage = StringOperations.createUTF8String(context, language, message); + return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -1272,8 +1257,7 @@ public RubySystemCallError systemCallError(String message, int errno, Backtrace if (message == null) { errorMessage = Nil.INSTANCE; } else { - errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + errorMessage = StringOperations.createUTF8String(context, language, message); } return ExceptionOperations .createSystemCallError(context, exceptionClass, errorMessage, errno, backtrace); @@ -1285,7 +1269,7 @@ public RubySystemCallError systemCallError(String message, int errno, Backtrace public RubyException ffiNullPointerError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().truffleFFINullPointerErrorClass; RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + .createUTF8String(context, language, message); return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -1293,8 +1277,7 @@ public RubyException ffiNullPointerError(String message, Node currentNode) { @TruffleBoundary public RubySystemExit systemExit(int exitStatus, Node currentNode) { - final RubyString message = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope("exit", UTF8Encoding.INSTANCE)); + final RubyString message = StringOperations.createUTF8String(context, language, "exit"); final RubyClass exceptionClass = context.getCoreLibrary().systemExitClass; final Backtrace backtrace = context.getCallStack().getBacktrace(currentNode); final Object cause = ThreadGetExceptionNode.getLastException(language); @@ -1313,8 +1296,8 @@ public RubySystemExit systemExit(int exitStatus, Node currentNode) { @TruffleBoundary public RubyException closedQueueError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().closedQueueErrorClass; - RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + RubyString errorMessage = StringOperations.createUTF8String(context, language, message); + return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } @@ -1327,8 +1310,8 @@ public RubyException closedQueueError(Node currentNode) { @TruffleBoundary public RubyException unsupportedMessageError(String message, Node currentNode) { RubyClass exceptionClass = context.getCoreLibrary().unsupportedMessageErrorClass; - RubyString errorMessage = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(message, UTF8Encoding.INSTANCE)); + RubyString errorMessage = StringOperations.createUTF8String(context, language, message); + return ExceptionOperations.createRubyException(context, exceptionClass, errorMessage, currentNode, null); } diff --git a/src/main/java/org/truffleruby/core/exception/ErrnoErrorNode.java b/src/main/java/org/truffleruby/core/exception/ErrnoErrorNode.java index 96ddcd4b4dc5..6cacc4278300 100644 --- a/src/main/java/org/truffleruby/core/exception/ErrnoErrorNode.java +++ b/src/main/java/org/truffleruby/core/exception/ErrnoErrorNode.java @@ -10,11 +10,12 @@ package org.truffleruby.core.exception; import com.oracle.truffle.api.CompilerDirectives; +import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.Specialization; -import org.jcodings.specific.UTF8Encoding; +import com.oracle.truffle.api.strings.TruffleString; +import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringOperations; import org.truffleruby.core.string.ImmutableRubyString; import org.truffleruby.language.RubyBaseNode; import org.truffleruby.language.backtrace.Backtrace; @@ -33,7 +34,8 @@ public abstract RubySystemCallError execute(RubyClass rubyClass, int errno, Obje Backtrace backtrace); @Specialization - protected RubySystemCallError errnoError(RubyClass rubyClass, int errno, Object extraMessage, Backtrace backtrace) { + protected RubySystemCallError errnoError(RubyClass rubyClass, int errno, Object extraMessage, Backtrace backtrace, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { final String errnoName = getContext().getCoreLibrary().getErrnoName(errno); final Object errnoDescription; @@ -47,10 +49,10 @@ protected RubySystemCallError errnoError(RubyClass rubyClass, int errno, Object } else { errnoClass = getContext().getCoreLibrary().getErrnoClass(errnoName); } - errnoDescription = StringOperations.createUTF8String( - getContext(), - getLanguage(), - StringOperations.encodeRope(ErrnoDescriptions.getDescription(errnoName), UTF8Encoding.INSTANCE)); + + + errnoDescription = createString(fromJavaStringNode, ErrnoDescriptions.getDescription(errnoName), + Encodings.UTF_8); } diff --git a/src/main/java/org/truffleruby/core/exception/ExceptionOperations.java b/src/main/java/org/truffleruby/core/exception/ExceptionOperations.java index 6ac655507b9b..7bcfa6b25908 100644 --- a/src/main/java/org/truffleruby/core/exception/ExceptionOperations.java +++ b/src/main/java/org/truffleruby/core/exception/ExceptionOperations.java @@ -22,6 +22,7 @@ import org.truffleruby.core.thread.ThreadNodes.ThreadGetExceptionNode; import org.truffleruby.language.Nil; import org.truffleruby.language.RubyConstant; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.backtrace.Backtrace; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; @@ -103,7 +104,7 @@ private static String messageFieldToString(RubyException exception) { final ModuleFields exceptionClass = exception.getLogicalClass().fields; return exceptionClass.getName(); // What Exception#message would return if no message is set } else if (strings.isRubyString(message)) { - return strings.getJavaString(message); + return RubyGuards.getJavaString(message); } else { return message.toString(); } @@ -117,9 +118,8 @@ public static String messageToString(RubyException exception) { } catch (Throwable e) { // Fall back to the internal message field } - final RubyStringLibrary libString = RubyStringLibrary.getUncached(); - if (messageObject != null && libString.isRubyString(messageObject)) { - return libString.getJavaString(messageObject); + if (messageObject != null && RubyStringLibrary.getUncached().isRubyString(messageObject)) { + return RubyGuards.getJavaString(messageObject); } else { return messageFieldToString(exception); } diff --git a/src/main/java/org/truffleruby/core/fiber/FiberNodes.java b/src/main/java/org/truffleruby/core/fiber/FiberNodes.java index b4878be8bbb0..341657f42f72 100644 --- a/src/main/java/org/truffleruby/core/fiber/FiberNodes.java +++ b/src/main/java/org/truffleruby/core/fiber/FiberNodes.java @@ -10,6 +10,7 @@ package org.truffleruby.core.fiber; import com.oracle.truffle.api.frame.VirtualFrame; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreMethodNode; @@ -26,9 +27,7 @@ import org.truffleruby.core.fiber.RubyFiber.FiberStatus; import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes.MakeStringNode; import org.truffleruby.core.thread.RubyThread; import org.truffleruby.language.Nil; import org.truffleruby.language.Visibility; @@ -347,8 +346,8 @@ protected RubyFiber current() { public abstract static class FiberSourceLocationNode extends PrimitiveArrayArgumentsNode { @Specialization protected RubyString sourceLocation(RubyFiber fiber, - @Cached MakeStringNode makeStringNode) { - return makeStringNode.executeMake(fiber.sourceLocation, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { + return createString(fromJavaStringNode, fiber.sourceLocation, Encodings.UTF_8); } } @@ -356,8 +355,8 @@ protected RubyString sourceLocation(RubyFiber fiber, public abstract static class FiberStatusNode extends PrimitiveArrayArgumentsNode { @Specialization protected RubyString status(RubyFiber fiber, - @Cached MakeStringNode makeStringNode) { - return makeStringNode.executeMake(fiber.status.label, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { + return createString(fromJavaStringNode, fiber.status.label, Encodings.UTF_8); } } diff --git a/src/main/java/org/truffleruby/core/format/BytesResult.java b/src/main/java/org/truffleruby/core/format/BytesResult.java index 7b1b5795ad91..11a0416736ba 100644 --- a/src/main/java/org/truffleruby/core/format/BytesResult.java +++ b/src/main/java/org/truffleruby/core/format/BytesResult.java @@ -9,15 +9,12 @@ */ package org.truffleruby.core.format; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.extra.ffi.Pointer; public class BytesResult { private final byte[] output; private final int outputLength; - private final int stringLength; - private final CodeRange stringCodeRange; private final FormatEncoding encoding; private final Pointer[] associated; @@ -25,14 +22,10 @@ public class BytesResult { public BytesResult( byte[] output, int outputLength, - int stringLength, - CodeRange stringCodeRange, FormatEncoding encoding, Pointer[] associated) { this.output = output; this.outputLength = outputLength; - this.stringLength = stringLength; - this.stringCodeRange = stringCodeRange; this.encoding = encoding; this.associated = associated; } @@ -45,14 +38,6 @@ public int getOutputLength() { return outputLength; } - public int getStringLength() { - return stringLength; - } - - public CodeRange getStringCodeRange() { - return stringCodeRange; - } - public FormatEncoding getEncoding() { return encoding; } diff --git a/src/main/java/org/truffleruby/core/format/FormatEncoding.java b/src/main/java/org/truffleruby/core/format/FormatEncoding.java index 41c75fddd003..740dc728f525 100644 --- a/src/main/java/org/truffleruby/core/format/FormatEncoding.java +++ b/src/main/java/org/truffleruby/core/format/FormatEncoding.java @@ -10,10 +10,6 @@ package org.truffleruby.core.format; import com.oracle.truffle.api.nodes.Node; -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; -import org.jcodings.specific.USASCIIEncoding; -import org.jcodings.specific.UTF8Encoding; import com.oracle.truffle.api.CompilerDirectives; import org.truffleruby.RubyContext; @@ -46,16 +42,16 @@ public RubyEncoding getEncodingForLength(int length) { } } - public static FormatEncoding find(Encoding encoding, Node currentNode) { - if (encoding == ASCIIEncoding.INSTANCE) { + public static FormatEncoding find(RubyEncoding encoding, Node currentNode) { + if (encoding == Encodings.BINARY) { return ASCII_8BIT; } - if (encoding == USASCIIEncoding.INSTANCE) { + if (encoding == Encodings.US_ASCII) { return US_ASCII; } - if (encoding == UTF8Encoding.INSTANCE) { + if (encoding == Encodings.UTF_8) { return UTF_8; } diff --git a/src/main/java/org/truffleruby/core/format/FormatExceptionTranslator.java b/src/main/java/org/truffleruby/core/format/FormatExceptionTranslator.java index 97eb51cfb2c9..78d89604dc97 100644 --- a/src/main/java/org/truffleruby/core/format/FormatExceptionTranslator.java +++ b/src/main/java/org/truffleruby/core/format/FormatExceptionTranslator.java @@ -11,7 +11,6 @@ import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; -import com.oracle.truffle.api.nodes.Node; import org.truffleruby.RubyContext; import org.truffleruby.core.exception.CoreExceptions; import org.truffleruby.core.format.exceptions.CantCompressNegativeException; @@ -22,12 +21,13 @@ import org.truffleruby.core.format.exceptions.OutsideOfStringException; import org.truffleruby.core.format.exceptions.RangeException; import org.truffleruby.core.format.exceptions.TooFewArgumentsException; +import org.truffleruby.language.RubyBaseNode; import org.truffleruby.language.control.RaiseException; public abstract class FormatExceptionTranslator { @TruffleBoundary - public static RuntimeException translate(RubyContext context, Node currentNode, FormatException exception) { + public static RuntimeException translate(RubyContext context, RubyBaseNode currentNode, FormatException exception) { final CoreExceptions coreExceptions = context.getCoreExceptions(); if (exception instanceof TooFewArgumentsException) { diff --git a/src/main/java/org/truffleruby/core/format/FormatFrameDescriptor.java b/src/main/java/org/truffleruby/core/format/FormatFrameDescriptor.java index b177fc4b1cec..09c06fa91677 100644 --- a/src/main/java/org/truffleruby/core/format/FormatFrameDescriptor.java +++ b/src/main/java/org/truffleruby/core/format/FormatFrameDescriptor.java @@ -16,25 +16,23 @@ public class FormatFrameDescriptor { public static final int SOURCE_SLOT; - public static final int SOURCE_LENGTH_SLOT; + public static final int SOURCE_END_POSITION_SLOT; + public static final int SOURCE_START_POSITION_SLOT; public static final int SOURCE_POSITION_SLOT; public static final int SOURCE_ASSOCIATED_SLOT; public static final int OUTPUT_SLOT; public static final int OUTPUT_POSITION_SLOT; - public static final int STRING_LENGTH_SLOT; - public static final int STRING_CODE_RANGE_SLOT; public static final int ASSOCIATED_SLOT; public static final FrameDescriptor FRAME_DESCRIPTOR; static { var builder = FrameDescriptor.newBuilder().defaultValue(Nil.INSTANCE); SOURCE_SLOT = builder.addSlot(FrameSlotKind.Object, "source", null); - SOURCE_LENGTH_SLOT = builder.addSlot(FrameSlotKind.Int, "source-length", null); + SOURCE_END_POSITION_SLOT = builder.addSlot(FrameSlotKind.Int, "source-end", null); + SOURCE_START_POSITION_SLOT = builder.addSlot(FrameSlotKind.Int, "source-start-position", null); SOURCE_POSITION_SLOT = builder.addSlot(FrameSlotKind.Int, "source-position", null); SOURCE_ASSOCIATED_SLOT = builder.addSlot(FrameSlotKind.Object, "source-associated", null); OUTPUT_SLOT = builder.addSlot(FrameSlotKind.Object, "output", null); OUTPUT_POSITION_SLOT = builder.addSlot(FrameSlotKind.Int, "output-position", null); - STRING_LENGTH_SLOT = builder.addSlot(FrameSlotKind.Int, "string-length", null); - STRING_CODE_RANGE_SLOT = builder.addSlot(FrameSlotKind.Int, "string-code-range", null); ASSOCIATED_SLOT = builder.addSlot(FrameSlotKind.Object, "associated", null); FRAME_DESCRIPTOR = builder.build(); diff --git a/src/main/java/org/truffleruby/core/format/FormatNode.java b/src/main/java/org/truffleruby/core/format/FormatNode.java index 8116ea4ca6c9..88cb9b2c0f5c 100644 --- a/src/main/java/org/truffleruby/core/format/FormatNode.java +++ b/src/main/java/org/truffleruby/core/format/FormatNode.java @@ -14,8 +14,7 @@ import org.truffleruby.core.array.ArrayUtils; import org.truffleruby.core.format.exceptions.TooFewArgumentsException; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.RopeConstants; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.language.RubyBaseNode; import com.oracle.truffle.api.CompilerDirectives; @@ -32,12 +31,15 @@ public abstract class FormatNode extends RubyBaseNode { private final ConditionProfile writeMoreThanZeroBytes = ConditionProfile.create(); private final ConditionProfile tooFewArgumentsProfile = ConditionProfile.create(); private final ConditionProfile sourceRangeProfile = ConditionProfile.create(); - private final ConditionProfile codeRangeIncreasedProfile = ConditionProfile.create(); public abstract Object execute(VirtualFrame frame); - public int getSourceLength(VirtualFrame frame) { - return frame.getInt(FormatFrameDescriptor.SOURCE_LENGTH_SLOT); + public int getSourceEnd(VirtualFrame frame) { + return frame.getInt(FormatFrameDescriptor.SOURCE_END_POSITION_SLOT); + } + + public int getSourceStart(VirtualFrame frame) { + return frame.getInt(FormatFrameDescriptor.SOURCE_START_POSITION_SLOT); } protected int getSourcePosition(VirtualFrame frame) { @@ -55,7 +57,7 @@ protected int advanceSourcePosition(VirtualFrame frame) { protected int advanceSourcePosition(VirtualFrame frame, int count) { final int sourcePosition = getSourcePosition(frame); - if (tooFewArgumentsProfile.profile(sourcePosition + count > getSourceLength(frame))) { + if (tooFewArgumentsProfile.profile(sourcePosition + count > getSourceEnd(frame))) { throw new TooFewArgumentsException(); } @@ -71,11 +73,11 @@ protected int advanceSourcePositionNoThrow(VirtualFrame frame) { protected int advanceSourcePositionNoThrow(VirtualFrame frame, int count, boolean consumePartial) { final int sourcePosition = getSourcePosition(frame); - final int sourceLength = getSourceLength(frame); + final int end = getSourceEnd(frame); - if (sourceRangeProfile.profile(sourcePosition + count > sourceLength)) { + if (sourceRangeProfile.profile(sourcePosition + count > end)) { if (consumePartial) { - setSourcePosition(frame, sourceLength); + setSourcePosition(frame, end); } return -1; @@ -102,44 +104,22 @@ protected void setOutputPosition(VirtualFrame frame, int position) { frame.setInt(FormatFrameDescriptor.OUTPUT_POSITION_SLOT, position); } - protected int getStringLength(VirtualFrame frame) { - return frame.getInt(FormatFrameDescriptor.STRING_LENGTH_SLOT); - } - - protected void setStringLength(VirtualFrame frame, int length) { - frame.setInt(FormatFrameDescriptor.STRING_LENGTH_SLOT, length); - } - - protected void increaseStringLength(VirtualFrame frame, int additionalLength) { - setStringLength(frame, getStringLength(frame) + additionalLength); - } - - protected void setStringCodeRange(VirtualFrame frame, CodeRange codeRange) { - final int existingCodeRange = frame.getInt(FormatFrameDescriptor.STRING_CODE_RANGE_SLOT); - - if (codeRangeIncreasedProfile.profile(codeRange.toInt() > existingCodeRange)) { - frame.setInt(FormatFrameDescriptor.STRING_CODE_RANGE_SLOT, codeRange.toInt()); - } - } - protected void writeByte(VirtualFrame frame, byte value) { final byte[] output = ensureCapacity(frame, 1); final int outputPosition = getOutputPosition(frame); output[outputPosition] = value; setOutputPosition(frame, outputPosition + 1); - increaseStringLength(frame, 1); } - protected void writeBytes(VirtualFrame frame, byte... values) { - writeBytes(frame, values, values.length); + protected void writeBytes(VirtualFrame frame, byte[] values) { + writeBytes(frame, values, 0, values.length); } - protected void writeBytes(VirtualFrame frame, byte[] values, int valuesLength) { + protected void writeBytes(VirtualFrame frame, byte[] values, int valuesOffset, int valuesLength) { byte[] output = ensureCapacity(frame, valuesLength); final int outputPosition = getOutputPosition(frame); - System.arraycopy(values, 0, output, outputPosition, valuesLength); + System.arraycopy(values, valuesOffset, output, outputPosition, valuesLength); setOutputPosition(frame, outputPosition + valuesLength); - increaseStringLength(frame, valuesLength); } protected void writeNullBytes(VirtualFrame frame, int length) { @@ -147,7 +127,6 @@ protected void writeNullBytes(VirtualFrame frame, int length) { ensureCapacity(frame, length); final int outputPosition = getOutputPosition(frame); setOutputPosition(frame, outputPosition + length); - increaseStringLength(frame, length); } } @@ -167,14 +146,14 @@ private byte[] ensureCapacity(VirtualFrame frame, int length) { } private static final Class HEAP_BYTE_BUFFER_CLASS = ByteBuffer - .wrap(RopeConstants.EMPTY_BYTES) + .wrap(TStringConstants.EMPTY_BYTES) .getClass(); public ByteBuffer wrapByteBuffer(VirtualFrame frame, byte[] source) { final int position = getSourcePosition(frame); - final int length = getSourceLength(frame); + final int end = getSourceEnd(frame); return CompilerDirectives - .castExact(wrapByteBuffer(source, position, length - position), HEAP_BYTE_BUFFER_CLASS); + .castExact(wrapByteBuffer(source, position, end - position), HEAP_BYTE_BUFFER_CLASS); } @TruffleBoundary diff --git a/src/main/java/org/truffleruby/core/format/FormatRootNode.java b/src/main/java/org/truffleruby/core/format/FormatRootNode.java index dec17236baca..0a9a6256cc46 100644 --- a/src/main/java/org/truffleruby/core/format/FormatRootNode.java +++ b/src/main/java/org/truffleruby/core/format/FormatRootNode.java @@ -12,7 +12,6 @@ import java.util.List; import org.truffleruby.RubyLanguage; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.extra.ffi.Pointer; import org.truffleruby.language.RubyBaseRootNode; import org.truffleruby.language.backtrace.InternalRootNode; @@ -48,12 +47,11 @@ public FormatRootNode( @Override public Object execute(VirtualFrame frame) { frame.setObject(FormatFrameDescriptor.SOURCE_SLOT, frame.getArguments()[0]); - frame.setInt(FormatFrameDescriptor.SOURCE_LENGTH_SLOT, (int) frame.getArguments()[1]); + frame.setInt(FormatFrameDescriptor.SOURCE_END_POSITION_SLOT, (int) frame.getArguments()[1]); + frame.setInt(FormatFrameDescriptor.SOURCE_START_POSITION_SLOT, 0); frame.setInt(FormatFrameDescriptor.SOURCE_POSITION_SLOT, 0); frame.setObject(FormatFrameDescriptor.OUTPUT_SLOT, new byte[expectedLength]); frame.setInt(FormatFrameDescriptor.OUTPUT_POSITION_SLOT, 0); - frame.setInt(FormatFrameDescriptor.STRING_LENGTH_SLOT, 0); - frame.setInt(FormatFrameDescriptor.STRING_CODE_RANGE_SLOT, CodeRange.CR_UNKNOWN.toInt()); frame.setObject(FormatFrameDescriptor.ASSOCIATED_SLOT, null); child.execute(frame); @@ -71,28 +69,17 @@ public Object execute(VirtualFrame frame) { } final byte[] output = (byte[]) frame.getObject(FormatFrameDescriptor.OUTPUT_SLOT); - final int stringLength; - if (encoding == FormatEncoding.UTF_8) { - stringLength = frame.getInt(FormatFrameDescriptor.STRING_LENGTH_SLOT); - } else { - stringLength = outputLength; - } - - final CodeRange stringCodeRange = CodeRange.fromInt(frame.getInt(FormatFrameDescriptor.STRING_CODE_RANGE_SLOT)); - final List associated; - - associated = (List) frame.getObject(FormatFrameDescriptor.ASSOCIATED_SLOT); + final List associated = (List) frame.getObject(FormatFrameDescriptor.ASSOCIATED_SLOT); final Pointer[] associatedArray; - if (associated != null) { associatedArray = associatedToArray(associated); } else { associatedArray = null; } - return new BytesResult(output, outputLength, stringLength, stringCodeRange, encoding, associatedArray); + return new BytesResult(output, outputLength, encoding, associatedArray); } @TruffleBoundary diff --git a/src/main/java/org/truffleruby/core/format/control/AdvanceSourcePositionNode.java b/src/main/java/org/truffleruby/core/format/control/AdvanceSourcePositionNode.java index 5b923534f3c8..3d304fac019d 100644 --- a/src/main/java/org/truffleruby/core/format/control/AdvanceSourcePositionNode.java +++ b/src/main/java/org/truffleruby/core/format/control/AdvanceSourcePositionNode.java @@ -28,11 +28,11 @@ public AdvanceSourcePositionNode(boolean toEnd) { @Override public Object execute(VirtualFrame frame) { if (toEnd) { - setSourcePosition(frame, getSourceLength(frame)); + setSourcePosition(frame, getSourceEnd(frame)); } else { final int position = getSourcePosition(frame); - if (rangeProfile.profile(position + 1 > getSourceLength(frame))) { + if (rangeProfile.profile(position + 1 > getSourceEnd(frame))) { throw new OutsideOfStringException(); } diff --git a/src/main/java/org/truffleruby/core/format/control/ReverseSourcePositionNode.java b/src/main/java/org/truffleruby/core/format/control/ReverseSourcePositionNode.java index fbfb2ba4b034..198677becd64 100644 --- a/src/main/java/org/truffleruby/core/format/control/ReverseSourcePositionNode.java +++ b/src/main/java/org/truffleruby/core/format/control/ReverseSourcePositionNode.java @@ -30,17 +30,17 @@ public Object execute(VirtualFrame frame) { final int position = getSourcePosition(frame); if (star) { - final int remaining = getSourceLength(frame) - position; + final int remaining = getSourceEnd(frame) - position; final int target = position - remaining; - if (rangeProfile.profile(target < 0)) { + if (rangeProfile.profile(target < getSourceStart(frame))) { throw new OutsideOfStringException(); } setSourcePosition(frame, target); } else { - if (rangeProfile.profile(position == 0)) { + if (rangeProfile.profile(position == getSourceStart(frame))) { throw new OutsideOfStringException(); } diff --git a/src/main/java/org/truffleruby/core/format/control/SetSourcePositionNode.java b/src/main/java/org/truffleruby/core/format/control/SetSourcePositionNode.java index 2595bcc72885..785d57972dc4 100644 --- a/src/main/java/org/truffleruby/core/format/control/SetSourcePositionNode.java +++ b/src/main/java/org/truffleruby/core/format/control/SetSourcePositionNode.java @@ -28,15 +28,17 @@ public SetSourcePositionNode(int position) { @Override public Object execute(VirtualFrame frame) { - if (rangeProfile.profile(position > getSourceLength(frame))) { + int positionWithStartOffset = getSourceStart(frame) + position; + + if (rangeProfile.profile(positionWithStartOffset > getSourceEnd(frame))) { throw new OutsideOfStringException(); } - if (rangeProfile.profile(position < 0)) { + if (position < 0) { throw new RangeException("pack length too big"); } - setSourcePosition(frame, position); + setSourcePosition(frame, positionWithStartOffset); return null; } diff --git a/src/main/java/org/truffleruby/core/format/control/StarNode.java b/src/main/java/org/truffleruby/core/format/control/StarNode.java index d89c62ebff7f..805331fac660 100644 --- a/src/main/java/org/truffleruby/core/format/control/StarNode.java +++ b/src/main/java/org/truffleruby/core/format/control/StarNode.java @@ -41,7 +41,7 @@ public StarRepeatingNode(FormatNode child) { @Override public boolean executeRepeating(VirtualFrame frame) { - if (getSourcePosition(frame) >= getSourceLength(frame)) { + if (getSourcePosition(frame) >= getSourceEnd(frame)) { return false; } diff --git a/src/main/java/org/truffleruby/core/format/convert/StringToPointerNode.java b/src/main/java/org/truffleruby/core/format/convert/StringToPointerNode.java index 32b513660194..0ee23dd3d033 100644 --- a/src/main/java/org/truffleruby/core/format/convert/StringToPointerNode.java +++ b/src/main/java/org/truffleruby/core/format/convert/StringToPointerNode.java @@ -13,7 +13,6 @@ import java.util.List; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; -import com.oracle.truffle.api.library.CachedLibrary; import org.truffleruby.cext.CExtNodes; import org.truffleruby.core.format.FormatFrameDescriptor; import org.truffleruby.core.format.FormatNode; @@ -35,12 +34,12 @@ protected long toPointer(Nil nil) { } @SuppressWarnings("unchecked") - @Specialization(guards = "strings.isRubyString(string)") + @Specialization(guards = "strings.isRubyString(string)", limit = "1") protected long toPointer(VirtualFrame frame, Object string, @Cached CExtNodes.StringToNativeNode stringToNativeNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { - final Pointer pointer = stringToNativeNode.executeToNative(string).getNativePointer(); + final Pointer pointer = stringToNativeNode.executeToNative(string); List associated = (List) frame.getObject(FormatFrameDescriptor.ASSOCIATED_SLOT); diff --git a/src/main/java/org/truffleruby/core/format/convert/ToStringNode.java b/src/main/java/org/truffleruby/core/format/convert/ToStringNode.java index 2b538c74928c..459633413594 100644 --- a/src/main/java/org/truffleruby/core/format/convert/ToStringNode.java +++ b/src/main/java/org/truffleruby/core/format/convert/ToStringNode.java @@ -11,19 +11,16 @@ import java.nio.charset.StandardCharsets; -import com.oracle.truffle.api.library.CachedLibrary; -import org.jcodings.specific.USASCIIEncoding; -import org.jcodings.specific.UTF8Encoding; +import com.oracle.truffle.api.dsl.Cached; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.array.RubyArray; +import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.format.FormatNode; import org.truffleruby.core.format.exceptions.NoImplicitConversionException; import org.truffleruby.core.kernel.KernelNodes; import org.truffleruby.core.klass.RubyClass; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.LazyIntRope; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.RubyString; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.language.Nil; import org.truffleruby.language.dispatch.DispatchNode; @@ -31,7 +28,6 @@ import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; -import com.oracle.truffle.api.frame.VirtualFrame; import org.truffleruby.language.library.RubyStringLibrary; import static org.truffleruby.language.dispatch.DispatchConfiguration.PRIVATE_RETURN_MISSING; @@ -66,69 +62,66 @@ public ToStringNode( this.convertNumbersToStrings = convertNumbersToStrings; this.conversionMethod = conversionMethod; this.inspectOnConversionFailure = inspectOnConversionFailure; + this.valueOnNil = valueOnNil; this.specialClassBehaviour = specialClassBehaviour; } - public abstract Object executeToString(VirtualFrame frame, Object object); + public abstract Object executeToString(Object object); @Specialization protected Object toStringNil(Nil nil) { return valueOnNil; } - @TruffleBoundary - @Specialization(guards = "convertNumbersToStrings") - protected Rope toString(int value) { - return new LazyIntRope(value); - } - - @TruffleBoundary @Specialization(guards = "convertNumbersToStrings") - protected Rope toString(long value) { - return RopeOperations.encodeAscii(Long.toString(value), USASCIIEncoding.INSTANCE); + protected RubyString toString(long value, + @Cached TruffleString.FromLongNode fromLongNode) { + var tstring = fromLongNode.execute(value, Encodings.US_ASCII.tencoding, true); + return createString(tstring, Encodings.US_ASCII); } @TruffleBoundary @Specialization(guards = "convertNumbersToStrings") - protected Rope toString(double value) { - return RopeOperations.encodeAscii(Double.toString(value), USASCIIEncoding.INSTANCE); + protected RubyString toString(double value, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { + return createString(fromJavaStringNode, Double.toString(value), Encodings.US_ASCII); } @TruffleBoundary @Specialization(guards = "specialClassBehaviour") - protected Rope toStringSpecialClass(RubyClass rubyClass, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + protected Object toStringSpecialClass(RubyClass rubyClass, + @Cached RubyStringLibrary libString) { if (rubyClass == getContext().getCoreLibrary().trueClass) { - return RopeConstants.TRUE; + return createString(TStringConstants.TRUE, Encodings.US_ASCII); } else if (rubyClass == getContext().getCoreLibrary().falseClass) { - return RopeConstants.FALSE; + return createString(TStringConstants.FALSE, Encodings.US_ASCII); } else if (rubyClass == getContext().getCoreLibrary().nilClass) { - return RopeConstants.NIL; + return createString(TStringConstants.NIL, Encodings.US_ASCII); } else { return toString(rubyClass, libString); } } - @Specialization(guards = "libString.isRubyString(string)") - protected Rope toStringString(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libValue, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Specialization(guards = "libString.isRubyString(string)", limit = "1") + protected Object toStringString(Object string, + @Cached RubyStringLibrary libValue, + @Cached RubyStringLibrary libString) { if ("inspect".equals(conversionMethod)) { final Object value = getToStrNode().call(string, conversionMethod); if (libValue.isRubyString(value)) { - return libValue.getRope(value); + return value; } else { throw new NoImplicitConversionException(string, "String"); } } - return libString.getRope(string); + return string; } @Specialization - protected Rope toString(RubyArray array, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + protected Object toString(RubyArray array, + @Cached RubyStringLibrary libString) { if (toSNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); toSNode = insert(DispatchNode.create(PRIVATE_RETURN_MISSING)); @@ -137,7 +130,7 @@ protected Rope toString(RubyArray array, final Object value = toSNode.call(array, "to_s"); if (libString.isRubyString(value)) { - return libString.getRope(value); + return value; } else { throw new NoImplicitConversionException(array, "String"); } @@ -145,12 +138,12 @@ protected Rope toString(RubyArray array, @Specialization( guards = { "isNotRubyString(object)", "!isRubyArray(object)", "!isForeignObject(object)" }) - protected Rope toString(Object object, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + protected Object toString(Object object, + @Cached RubyStringLibrary libString) { final Object value = getToStrNode().call(object, conversionMethod); if (libString.isRubyString(value)) { - return libString.getRope(value); + return value; } if (inspectOnConversionFailure) { @@ -159,7 +152,7 @@ protected Rope toString(Object object, inspectNode = insert(KernelNodes.ToSNode.create()); } - return inspectNode.executeToS(object).rope; + return inspectNode.executeToS(object); } else { throw new NoImplicitConversionException(object, "String"); } @@ -167,11 +160,11 @@ protected Rope toString(Object object, @TruffleBoundary @Specialization(guards = "isForeignObject(object)") - protected Rope toStringForeign(Object object) { - return RopeOperations.create( + protected RubyString toStringForeign(Object object, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { + return createString(fromByteArrayNode, object.toString().getBytes(StandardCharsets.UTF_8), - UTF8Encoding.INSTANCE, - CodeRange.CR_UNKNOWN); + Encodings.UTF_8); } private DispatchNode getToStrNode() { diff --git a/src/main/java/org/truffleruby/core/format/convert/ToStringObjectNode.java b/src/main/java/org/truffleruby/core/format/convert/ToStringObjectNode.java index 8a9c2fad587e..8dee0d38db4d 100644 --- a/src/main/java/org/truffleruby/core/format/convert/ToStringObjectNode.java +++ b/src/main/java/org/truffleruby/core/format/convert/ToStringObjectNode.java @@ -9,7 +9,6 @@ */ package org.truffleruby.core.format.convert; -import com.oracle.truffle.api.library.CachedLibrary; import org.truffleruby.core.cast.ToStrNode; import org.truffleruby.core.format.FormatNode; import org.truffleruby.core.format.exceptions.NoImplicitConversionException; @@ -33,9 +32,9 @@ protected Object toStringString(Nil nil) { return nil; } - @Specialization(guards = "strings.isRubyString(string)") + @Specialization(guards = "strings.isRubyString(string)", limit = "1") protected Object toStringString(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { return string; } diff --git a/src/main/java/org/truffleruby/core/format/format/FormatCharacterNode.java b/src/main/java/org/truffleruby/core/format/format/FormatCharacterNode.java index 87a794f1a3dd..a4cd94d451ec 100644 --- a/src/main/java/org/truffleruby/core/format/format/FormatCharacterNode.java +++ b/src/main/java/org/truffleruby/core/format/format/FormatCharacterNode.java @@ -9,6 +9,7 @@ */ package org.truffleruby.core.format.format; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.cast.ToIntNode; import org.truffleruby.core.cast.ToIntNodeGen; import org.truffleruby.core.format.FormatNode; @@ -18,8 +19,6 @@ import org.truffleruby.core.format.exceptions.NoImplicitConversionException; import org.truffleruby.core.format.printf.PrintfSimpleTreeBuilder; import org.truffleruby.core.format.write.bytes.WriteByteNodeGen; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.core.string.StringUtils; import org.truffleruby.language.RubyGuards; import org.truffleruby.language.control.RaiseException; @@ -29,7 +28,7 @@ import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; -import com.oracle.truffle.api.frame.VirtualFrame; +import org.truffleruby.language.library.RubyStringLibrary; @NodeChild("width") @NodeChild("value") @@ -45,20 +44,23 @@ public FormatCharacterNode(boolean hasMinusFlag) { } @Specialization(guards = { "width == cachedWidth" }, limit = "getLimit()") - protected byte[] formatCached(VirtualFrame frame, int width, Object value, + protected byte[] formatCached(int width, Object value, @Cached("width") int cachedWidth, - @Cached("makeFormatString(width)") String cachedFormatString) { - final String charString = getCharString(frame, value); + @Cached("makeFormatString(width)") String cachedFormatString, + @Cached RubyStringLibrary libString) { + final String charString = getCharString(value, libString); return StringUtils.formatASCIIBytes(cachedFormatString, charString); } @Specialization(replaces = "formatCached") - protected byte[] format(VirtualFrame frame, int width, Object value) { - final String charString = getCharString(frame, value); + protected byte[] format(int width, Object value, + @Cached RubyStringLibrary libString) { + final String charString = getCharString(value, libString); return StringUtils.formatASCIIBytes(makeFormatString(width), charString); } - protected String getCharString(VirtualFrame frame, Object value) { + @TruffleBoundary + protected String getCharString(Object value, RubyStringLibrary libString) { if (toStringNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); toStringNode = insert(ToStringNodeGen.create( @@ -70,7 +72,7 @@ protected String getCharString(VirtualFrame frame, Object value) { } Object toStrResult; try { - toStrResult = toStringNode.executeToString(frame, value); + toStrResult = toStringNode.executeToString(value); } catch (NoImplicitConversionException e) { toStrResult = null; } @@ -84,9 +86,8 @@ protected String getCharString(VirtualFrame frame, Object value) { final int charValue = toIntegerNode.execute(value); // TODO BJF check char length is > 0 charString = Character.toString((char) charValue); - } else { - Rope rope = (Rope) toStrResult; - final String resultString = RopeOperations.decodeRope(rope); + } else if (libString.isRubyString(toStrResult)) { + final String resultString = RubyGuards.getJavaString(toStrResult); final int size = resultString.length(); if (size > 1) { throw new RaiseException( @@ -94,6 +95,14 @@ protected String getCharString(VirtualFrame frame, Object value) { getContext().getCoreExceptions().argumentErrorCharacterRequired(this)); } charString = resultString; + } else { + var tstring = (TruffleString) toStrResult; + charString = tstring.toJavaStringUncached(); + if (charString.length() > 1) { + throw new RaiseException( + getContext(), + getContext().getCoreExceptions().argumentErrorCharacterRequired(this)); + } } return charString; } diff --git a/src/main/java/org/truffleruby/core/format/format/FormatFloatHumanReadableNode.java b/src/main/java/org/truffleruby/core/format/format/FormatFloatHumanReadableNode.java index b3dbf407bcb9..36599881dd5d 100644 --- a/src/main/java/org/truffleruby/core/format/format/FormatFloatHumanReadableNode.java +++ b/src/main/java/org/truffleruby/core/format/format/FormatFloatHumanReadableNode.java @@ -14,7 +14,7 @@ import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.StringOperations; @NodeChild("value") public abstract class FormatFloatHumanReadableNode extends FormatNode { @@ -22,13 +22,13 @@ public abstract class FormatFloatHumanReadableNode extends FormatNode { @TruffleBoundary @Specialization(guards = "isIntegerValue(value)") protected byte[] formatInteger(double value) { - return RopeOperations.encodeAsciiBytes(String.valueOf((long) value)); + return StringOperations.encodeAsciiBytes(String.valueOf((long) value)); } @TruffleBoundary @Specialization(guards = "!isIntegerValue(value)") protected byte[] format(double value) { - return RopeOperations.encodeAsciiBytes(String.valueOf(value)); + return StringOperations.encodeAsciiBytes(String.valueOf(value)); } protected boolean isIntegerValue(double value) { diff --git a/src/main/java/org/truffleruby/core/format/format/FormatIntegerBinaryNode.java b/src/main/java/org/truffleruby/core/format/format/FormatIntegerBinaryNode.java index 52a8c17615f3..f981fcd0e4ad 100644 --- a/src/main/java/org/truffleruby/core/format/format/FormatIntegerBinaryNode.java +++ b/src/main/java/org/truffleruby/core/format/format/FormatIntegerBinaryNode.java @@ -15,7 +15,7 @@ import org.truffleruby.core.format.FormatNode; import org.truffleruby.core.format.printf.PrintfSimpleTreeBuilder; import org.truffleruby.core.numeric.RubyBignum; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.StringOperations; import java.math.BigInteger; @@ -181,7 +181,7 @@ private static byte[] getFormattedString(String formatted, int width, int precis } } - return RopeOperations.encodeAsciiBytes(formatted); + return StringOperations.encodeAsciiBytes(formatted); } } diff --git a/src/main/java/org/truffleruby/core/format/pack/PackCompiler.java b/src/main/java/org/truffleruby/core/format/pack/PackCompiler.java index 32ea3a72eb7e..50c2e2f6a022 100644 --- a/src/main/java/org/truffleruby/core/format/pack/PackCompiler.java +++ b/src/main/java/org/truffleruby/core/format/pack/PackCompiler.java @@ -15,7 +15,7 @@ import org.truffleruby.core.format.LoopRecovery; import com.oracle.truffle.api.RootCallTarget; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.StringOperations; import org.truffleruby.language.control.DeferredRaiseException; public class PackCompiler { @@ -37,7 +37,7 @@ public RootCallTarget compile(String format) throws DeferredRaiseException { builder.enterSequence(); - final SimplePackParser parser = new SimplePackParser(builder, RopeOperations.encodeAsciiBytes(format)); + final SimplePackParser parser = new SimplePackParser(builder, StringOperations.encodeAsciiBytes(format)); parser.parse(); builder.exitSequence(); diff --git a/src/main/java/org/truffleruby/core/format/pack/SimplePackTreeBuilder.java b/src/main/java/org/truffleruby/core/format/pack/SimplePackTreeBuilder.java index b5ef6ae6dc0f..f1acee88047a 100644 --- a/src/main/java/org/truffleruby/core/format/pack/SimplePackTreeBuilder.java +++ b/src/main/java/org/truffleruby/core/format/pack/SimplePackTreeBuilder.java @@ -53,6 +53,8 @@ import com.oracle.truffle.api.nodes.Node; +import static org.truffleruby.language.RubyBaseNode.nil; + public class SimplePackTreeBuilder implements SimplePackListener { private final Node currentNode; @@ -181,7 +183,7 @@ public void mimeString(int count) { true, "to_s", true, - Nil.INSTANCE, + nil, new SourceNode()))); } diff --git a/src/main/java/org/truffleruby/core/format/printf/PrintfCompiler.java b/src/main/java/org/truffleruby/core/format/printf/PrintfCompiler.java index a7d6ba2fdcb1..21947ce68145 100644 --- a/src/main/java/org/truffleruby/core/format/printf/PrintfCompiler.java +++ b/src/main/java/org/truffleruby/core/format/printf/PrintfCompiler.java @@ -11,13 +11,16 @@ import java.util.List; +import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.nodes.Node; +import com.oracle.truffle.api.strings.AbstractTruffleString; import org.truffleruby.RubyLanguage; +import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.format.FormatEncoding; import org.truffleruby.core.format.FormatRootNode; -import org.truffleruby.core.rope.Rope; import com.oracle.truffle.api.RootCallTarget; +import org.truffleruby.core.string.StringSupport; public class PrintfCompiler { @@ -29,26 +32,21 @@ public PrintfCompiler(RubyLanguage language, Node currentNode) { this.currentNode = currentNode; } - public RootCallTarget compile(Rope format, Object[] arguments, boolean isDebug) { - final PrintfSimpleParser parser = new PrintfSimpleParser(bytesToChars(format.getBytes()), arguments, isDebug); + @TruffleBoundary + public RootCallTarget compile(AbstractTruffleString tstring, RubyEncoding encoding, Object[] arguments, + boolean isDebug) { + var byteArray = tstring.getInternalByteArrayUncached(encoding.tencoding); + + final PrintfSimpleParser parser = new PrintfSimpleParser(StringSupport.bytesToChars(byteArray), arguments, + isDebug); final List configs = parser.parse(); final PrintfSimpleTreeBuilder builder = new PrintfSimpleTreeBuilder(language, configs); return new FormatRootNode( language, currentNode.getEncapsulatingSourceSection(), - FormatEncoding.find(format.getEncoding(), currentNode), + FormatEncoding.find(encoding, currentNode), builder.getNode()).getCallTarget(); } - private static char[] bytesToChars(byte[] bytes) { - final char[] chars = new char[bytes.length]; - - for (int n = 0; n < bytes.length; n++) { - chars[n] = (char) bytes[n]; - } - - return chars; - } - } diff --git a/src/main/java/org/truffleruby/core/format/printf/PrintfSimpleTreeBuilder.java b/src/main/java/org/truffleruby/core/format/printf/PrintfSimpleTreeBuilder.java index 1dd0bc063c56..11e9f47bcf59 100644 --- a/src/main/java/org/truffleruby/core/format/printf/PrintfSimpleTreeBuilder.java +++ b/src/main/java/org/truffleruby/core/format/printf/PrintfSimpleTreeBuilder.java @@ -13,9 +13,9 @@ import java.util.List; import com.oracle.truffle.api.CompilerDirectives; -import org.jcodings.specific.USASCIIEncoding; import org.truffleruby.RubyLanguage; import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.format.FormatNode; import org.truffleruby.core.format.LiteralFormatNode; import org.truffleruby.core.format.SharedTreeBuilder; @@ -37,10 +37,8 @@ import org.truffleruby.core.format.read.array.ReadValueNodeGen; import org.truffleruby.core.format.write.bytes.WriteBytesNodeGen; import org.truffleruby.core.format.write.bytes.WritePaddedBytesNodeGen; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.LeafRope; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.FrozenStrings; +import org.truffleruby.core.string.ImmutableRubyString; import org.truffleruby.core.symbol.RubySymbol; public class PrintfSimpleTreeBuilder { @@ -51,7 +49,7 @@ public class PrintfSimpleTreeBuilder { public static final int DEFAULT = Integer.MIN_VALUE; - private static final LeafRope EMPTY_ROPE = RopeConstants.EMPTY_US_ASCII_ROPE; + private static final ImmutableRubyString EMPTY_STRING = FrozenStrings.EMPTY_US_ASCII; public PrintfSimpleTreeBuilder(RubyLanguage language, List configs) { this.language = language; @@ -67,10 +65,8 @@ private void buildTree() { final FormatNode valueNode; if (config.getNamesBytes() != null) { - final RubySymbol key = language.getSymbol(RopeOperations.create( - config.getNamesBytes(), - USASCIIEncoding.INSTANCE, - CodeRange.CR_7BIT), Encodings.US_ASCII); + final RubySymbol key = language.getSymbol( + TStringUtils.fromByteArray(config.getNamesBytes(), Encodings.US_ASCII), Encodings.US_ASCII); valueNode = ReadHashValueNodeGen.create(key, new SourceNode()); } else if (config.getAbsoluteArgumentIndex() != null) { valueNode = ReadArgumentIndexValueNodeGen @@ -231,11 +227,11 @@ private void buildTree() { true, conversionMethodName, false, - EMPTY_ROPE, + EMPTY_STRING, new SourceNode()); } else { conversionNode = ToStringNodeGen - .create(true, conversionMethodName, false, EMPTY_ROPE, valueNode); + .create(true, conversionMethodName, false, EMPTY_STRING, valueNode); } if (config.getWidth() != null || config.isWidthStar() || diff --git a/src/main/java/org/truffleruby/core/format/rbsprintf/RBSprintfCompiler.java b/src/main/java/org/truffleruby/core/format/rbsprintf/RBSprintfCompiler.java index a76c4e922097..97e88a37fe91 100644 --- a/src/main/java/org/truffleruby/core/format/rbsprintf/RBSprintfCompiler.java +++ b/src/main/java/org/truffleruby/core/format/rbsprintf/RBSprintfCompiler.java @@ -11,17 +11,21 @@ import java.util.List; +import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.nodes.Node; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.core.array.RubyArray; +import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.format.FormatEncoding; import org.truffleruby.core.format.FormatRootNode; import org.truffleruby.core.format.rbsprintf.RBSprintfConfig.FormatArgumentType; -import org.truffleruby.core.rope.Rope; import com.oracle.truffle.api.RootCallTarget; +import org.truffleruby.core.string.StringSupport; public class RBSprintfCompiler { @@ -33,22 +37,29 @@ public RBSprintfCompiler(RubyLanguage language, Node currentNode) { this.currentNode = currentNode; } - public RootCallTarget compile(Rope format, Object stringReader) { - final RBSprintfSimpleParser parser = new RBSprintfSimpleParser(bytesToChars(format.getBytes()), false); + @TruffleBoundary + public RootCallTarget compile(AbstractTruffleString formatTString, RubyEncoding formatEncoding, + Object stringReader) { + var byteArray = formatTString.getInternalByteArrayUncached(formatEncoding.tencoding); + + final RBSprintfSimpleParser parser = new RBSprintfSimpleParser(StringSupport.bytesToChars(byteArray), false); final List configs = parser.parse(); final RBSprintfSimpleTreeBuilder builder = new RBSprintfSimpleTreeBuilder(configs, stringReader); return new FormatRootNode( language, currentNode.getEncapsulatingSourceSection(), - FormatEncoding.find(format.getEncoding(), currentNode), + FormatEncoding.find(formatEncoding, currentNode), builder.getNode()).getCallTarget(); } private static int SIGN = 0x10; - public RubyArray typeList(Rope format, RubyContext context, RubyLanguage language) { - final RBSprintfSimpleParser parser = new RBSprintfSimpleParser(bytesToChars(format.getBytes()), false); + public RubyArray typeList(AbstractTruffleString formatTString, RubyEncoding formatEncoding, + TruffleString.GetInternalByteArrayNode byteArrayNode, RubyContext context, RubyLanguage language) { + var byteArray = byteArrayNode.execute(formatTString, formatEncoding.tencoding); + + final RBSprintfSimpleParser parser = new RBSprintfSimpleParser(StringSupport.bytesToChars(byteArray), false); final List configs = parser.parse(); final int[] types = new int[3 * configs.size()]; // Ensure there is enough space for the argument types that might be in the format string. @@ -100,15 +111,4 @@ public RubyArray typeList(Rope format, RubyContext context, RubyLanguage languag return new RubyArray(context.getCoreLibrary().arrayClass, language.arrayShape, types, highWaterMark + 1); } - private static char[] bytesToChars(byte[] bytes) { - final char[] chars = new char[bytes.length]; - - for (int n = 0; n < bytes.length; n++) { - chars[n] = (char) bytes[n]; - } - - return chars; - } - - } diff --git a/src/main/java/org/truffleruby/core/format/rbsprintf/RBSprintfSimpleTreeBuilder.java b/src/main/java/org/truffleruby/core/format/rbsprintf/RBSprintfSimpleTreeBuilder.java index 5e6cc04e91d1..ca30f5b77df7 100644 --- a/src/main/java/org/truffleruby/core/format/rbsprintf/RBSprintfSimpleTreeBuilder.java +++ b/src/main/java/org/truffleruby/core/format/rbsprintf/RBSprintfSimpleTreeBuilder.java @@ -37,8 +37,8 @@ import org.truffleruby.core.format.read.array.ReadValueNodeGen; import org.truffleruby.core.format.write.bytes.WriteBytesNodeGen; import org.truffleruby.core.format.write.bytes.WritePaddedBytesNodeGen; -import org.truffleruby.core.rope.LeafRope; -import org.truffleruby.core.rope.RopeConstants; +import org.truffleruby.core.string.FrozenStrings; +import org.truffleruby.core.string.ImmutableRubyString; public class RBSprintfSimpleTreeBuilder { @@ -48,7 +48,7 @@ public class RBSprintfSimpleTreeBuilder { public static final int DEFAULT = PrintfSimpleTreeBuilder.DEFAULT; - private static final LeafRope EMPTY_ROPE = RopeConstants.EMPTY_US_ASCII_ROPE; + private static final ImmutableRubyString EMPTY_STRING = FrozenStrings.EMPTY_US_ASCII; public RBSprintfSimpleTreeBuilder(List configs, Object stringReader) { this.configs = configs; @@ -275,13 +275,12 @@ private void buildTree() { true, conversionMethodName, false, - EMPTY_ROPE, + EMPTY_STRING, config.isPlus(), new SourceNode()); } else { - conversionNode = ToStringNodeGen - .create(true, conversionMethodName, false, EMPTY_ROPE, config.isPlus(), - valueNode); + conversionNode = ToStringNodeGen.create(true, conversionMethodName, false, EMPTY_STRING, + config.isPlus(), valueNode); } } else { conversionNode = ToStringNodeGen @@ -289,7 +288,7 @@ private void buildTree() { true, conversionMethodName, false, - EMPTY_ROPE, + EMPTY_STRING, config.isPlus(), (config.getAbsoluteArgumentIndex() == null) ? (ReadCValueNodeGen diff --git a/src/main/java/org/truffleruby/core/format/read/array/ReadCStringNode.java b/src/main/java/org/truffleruby/core/format/read/array/ReadCStringNode.java index 4ca3ddc57840..daa03c2e7b8e 100644 --- a/src/main/java/org/truffleruby/core/format/read/array/ReadCStringNode.java +++ b/src/main/java/org/truffleruby/core/format/read/array/ReadCStringNode.java @@ -14,13 +14,11 @@ import org.truffleruby.core.format.FormatNode; import org.truffleruby.interop.InteropNodes; import org.truffleruby.interop.TranslateInteropExceptionNode; -import org.truffleruby.language.library.RubyStringLibrary; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.ImportStatic; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; -import com.oracle.truffle.api.frame.VirtualFrame; import com.oracle.truffle.api.interop.InteropLibrary; import com.oracle.truffle.api.library.CachedLibrary; @@ -35,17 +33,16 @@ public ReadCStringNode(Object stringReader) { } @Specialization - protected Object read(VirtualFrame frame, Object pointer, + protected Object read(Object pointer, @Cached UnwrapNode unwrapNode, @Cached TranslateInteropExceptionNode translateInteropExceptionNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString, @CachedLibrary("stringReader") InteropLibrary stringReaders) { Object string = unwrapNode.execute(InteropNodes.execute( stringReader, new Object[]{ pointer }, stringReaders, translateInteropExceptionNode)); - return libString.getRope(string); + return string; } } diff --git a/src/main/java/org/truffleruby/core/format/read/array/ReadStringNode.java b/src/main/java/org/truffleruby/core/format/read/array/ReadStringNode.java index 8b738ccd7d5a..ad57c63e8483 100644 --- a/src/main/java/org/truffleruby/core/format/read/array/ReadStringNode.java +++ b/src/main/java/org/truffleruby/core/format/read/array/ReadStringNode.java @@ -61,10 +61,10 @@ public ReadStringNode( @Specialization(limit = "storageStrategyLimit()") protected Object read(VirtualFrame frame, Object source, @CachedLibrary("source") ArrayStoreLibrary sources) { - return readAndConvert(frame, sources.read(source, advanceSourcePosition(frame))); + return readAndConvert(sources.read(source, advanceSourcePosition(frame))); } - private Object readAndConvert(VirtualFrame frame, Object value) { + private Object readAndConvert(Object value) { if (toStringNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); toStringNode = insert(ToStringNodeGen.create( @@ -76,7 +76,7 @@ private Object readAndConvert(VirtualFrame frame, Object value) { WriteByteNodeGen.create(new LiteralFormatNode((byte) 0)))); } - return toStringNode.executeToString(frame, value); + return toStringNode.executeToString(value); } } diff --git a/src/main/java/org/truffleruby/core/format/read/bytes/ReadBase64StringNode.java b/src/main/java/org/truffleruby/core/format/read/bytes/ReadBase64StringNode.java index c1576fddccb2..4653757349bd 100644 --- a/src/main/java/org/truffleruby/core/format/read/bytes/ReadBase64StringNode.java +++ b/src/main/java/org/truffleruby/core/format/read/bytes/ReadBase64StringNode.java @@ -48,13 +48,12 @@ import java.nio.ByteBuffer; import java.util.Arrays; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.format.FormatNode; import org.truffleruby.core.format.exceptions.InvalidFormatException; import org.truffleruby.core.format.read.SourceNode; import org.truffleruby.core.format.write.bytes.EncodeUM; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.string.StringNodes; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.NodeChild; @@ -64,7 +63,7 @@ @NodeChild(value = "source", type = SourceNode.class) public abstract class ReadBase64StringNode extends FormatNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromByteArrayNode fromByteArrayNode = TruffleString.FromByteArrayNode.create(); private final int count; @@ -80,7 +79,7 @@ protected Object read(VirtualFrame frame, byte[] source) { setSourcePosition(frame, encode.position()); - return makeStringNode.executeMake(result, Encodings.BINARY, CodeRange.CR_UNKNOWN); + return createString(fromByteArrayNode, result, Encodings.BINARY); } @TruffleBoundary diff --git a/src/main/java/org/truffleruby/core/format/read/bytes/ReadBinaryStringNode.java b/src/main/java/org/truffleruby/core/format/read/bytes/ReadBinaryStringNode.java index 505bbeea2ab2..8b9bfe794aa6 100644 --- a/src/main/java/org/truffleruby/core/format/read/bytes/ReadBinaryStringNode.java +++ b/src/main/java/org/truffleruby/core/format/read/bytes/ReadBinaryStringNode.java @@ -11,12 +11,11 @@ import java.util.Arrays; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.format.FormatNode; import org.truffleruby.core.format.read.SourceNode; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.NodeChild; @@ -59,33 +58,34 @@ protected void read(VirtualFrame frame, Object source) { @Specialization protected RubyString read(VirtualFrame frame, byte[] source, - @Cached StringNodes.MakeStringNode makeStringNode) { + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { final int start = getSourcePosition(frame); + final int end = getSourceEnd(frame); int length; if (readToEnd) { length = 0; - while (start + length < getSourceLength(frame) && - (!readToNull || (start + length < getSourceLength(frame) && source[start + length] != 0))) { + while (start + length < end && + (!readToNull || (start + length < end && source[start + length] != 0))) { length++; } - if (start + length < getSourceLength(frame) && source[start + length] == 0) { + if (start + length < end && source[start + length] == 0) { length++; } } else if (readToNull) { length = 0; - while (start + length < getSourceLength(frame) && length < count && source[start + length] != 0) { + while (start + length < end && length < count && source[start + length] != 0) { length++; } } else { length = count; - if (start + length >= getSourceLength(frame)) { - length = getSourceLength(frame) - start; + if (start + length >= end) { + length = end - start; } } @@ -106,10 +106,10 @@ protected RubyString read(VirtualFrame frame, byte[] source, setSourcePosition(frame, start + length); - return makeStringNode.executeMake( + return createString( + fromByteArrayNode, Arrays.copyOfRange(source, start, start + usedLength), - Encodings.BINARY, - CodeRange.CR_UNKNOWN); + Encodings.BINARY); } private int indexOfFirstNull(byte[] bytes, int start, int length) { diff --git a/src/main/java/org/truffleruby/core/format/read/bytes/ReadBitStringNode.java b/src/main/java/org/truffleruby/core/format/read/bytes/ReadBitStringNode.java index 0513e27ed84c..048caa7c271c 100644 --- a/src/main/java/org/truffleruby/core/format/read/bytes/ReadBitStringNode.java +++ b/src/main/java/org/truffleruby/core/format/read/bytes/ReadBitStringNode.java @@ -48,11 +48,10 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.format.FormatNode; import org.truffleruby.core.format.read.SourceNode; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.string.StringNodes; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; @@ -61,7 +60,7 @@ @NodeChild(value = "source", type = SourceNode.class) public abstract class ReadBitStringNode extends FormatNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromByteArrayNode fromByteArrayNode = TruffleString.FromByteArrayNode.create(); private final ByteOrder byteOrder; private final boolean star; @@ -113,7 +112,7 @@ protected Object read(VirtualFrame frame, byte[] source) { setSourcePosition(frame, encode.position()); - return makeStringNode.executeMake(lElem, Encodings.US_ASCII, CodeRange.CR_7BIT); + return createString(fromByteArrayNode, lElem, Encodings.US_ASCII); // CR_7BIT } } diff --git a/src/main/java/org/truffleruby/core/format/read/bytes/ReadHexStringNode.java b/src/main/java/org/truffleruby/core/format/read/bytes/ReadHexStringNode.java index 958b8967a0df..c2226c03a58d 100644 --- a/src/main/java/org/truffleruby/core/format/read/bytes/ReadHexStringNode.java +++ b/src/main/java/org/truffleruby/core/format/read/bytes/ReadHexStringNode.java @@ -48,12 +48,11 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.format.FormatNode; import org.truffleruby.core.format.read.SourceNode; import org.truffleruby.core.format.write.bytes.EncodeUM; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.string.StringNodes; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; @@ -62,7 +61,7 @@ @NodeChild(value = "source", type = SourceNode.class) public abstract class ReadHexStringNode extends FormatNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromByteArrayNode fromByteArrayNode = TruffleString.FromByteArrayNode.create(); private final ByteOrder byteOrder; private final boolean star; @@ -112,7 +111,7 @@ protected Object read(VirtualFrame frame, byte[] source) { setSourcePosition(frame, encode.position()); - return makeStringNode.executeMake(lElem, Encodings.US_ASCII, CodeRange.CR_7BIT); + return createString(fromByteArrayNode, lElem, Encodings.US_ASCII); // CR_7BIT } } diff --git a/src/main/java/org/truffleruby/core/format/read/bytes/ReadMIMEStringNode.java b/src/main/java/org/truffleruby/core/format/read/bytes/ReadMIMEStringNode.java index 5bcf8340b338..c27ce0e49529 100644 --- a/src/main/java/org/truffleruby/core/format/read/bytes/ReadMIMEStringNode.java +++ b/src/main/java/org/truffleruby/core/format/read/bytes/ReadMIMEStringNode.java @@ -35,14 +35,12 @@ */ package org.truffleruby.core.format.read.bytes; -import java.util.Arrays; - import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; +import com.oracle.truffle.api.dsl.Cached; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.format.FormatNode; import org.truffleruby.core.format.read.SourceNode; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.string.StringNodes; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; @@ -51,43 +49,41 @@ @NodeChild(value = "source", type = SourceNode.class) public abstract class ReadMIMEStringNode extends FormatNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); - @Specialization - protected Object read(VirtualFrame frame, byte[] source) { + protected Object read(VirtualFrame frame, byte[] source, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { final int position = getSourcePosition(frame); - final int sourceLength = getSourceLength(frame); + final int end = getSourceEnd(frame); - final byte[] store = new byte[sourceLength - position]; + final byte[] store = new byte[end - position]; - final int storeIndex = parseSource(source, position, sourceLength, store); + final int storeIndex = parseSource(source, position, end, store); - setSourcePosition(frame, sourceLength); + setSourcePosition(frame, end); - return makeStringNode - .executeMake(Arrays.copyOfRange(store, 0, storeIndex), Encodings.BINARY, CodeRange.CR_UNKNOWN); + var tstring = fromByteArrayNode.execute(store, 0, storeIndex, Encodings.BINARY.tencoding, true); + return createString(tstring, Encodings.BINARY); } // Logic from MRI pack.c pack_unpack_internal // https://github.com/ruby/ruby/blob/37c2cd3fa47c709570e22ec4dac723ca211f423a/pack.c#L1639 @TruffleBoundary - private int parseSource(byte[] source, int position, int sourceLength, byte[] store) { - System.arraycopy(source, position, store, 0, sourceLength - position); - + private int parseSource(byte[] source, int position, int end, byte[] store) { + int sourceLength = end - position; int storeIndex = 0; int loopIndex = 0; - if (source.length > 0) { - int c = source[0] & 0xff; + if (sourceLength > 0) { int i = position; - while (i < sourceLength) { + int c = source[i] & 0xff; + while (i < end) { if (c == '=') { - if (++i == sourceLength) { + if (++i == end) { break; } c = source[i] & 0xff; - if (i + 1 < sourceLength && c == '\r' && (source[i + 1] & 0xff) == '\n') { + if (i + 1 < end && c == '\r' && (source[i + 1] & 0xff) == '\n') { i++; c = source[i] & 0xff; } @@ -98,7 +94,7 @@ private int parseSource(byte[] source, int position, int sourceLength, byte[] st break; } - if (++i == sourceLength) { + if (++i == end) { break; } c = source[i] & 0xff; @@ -118,17 +114,18 @@ private int parseSource(byte[] source, int position, int sourceLength, byte[] st storeIndex++; } i++; - if (i < sourceLength) { + if (i < end) { c = source[i] & 0xff; } - loopIndex = i; + loopIndex = i - position; } } final int storeLength = store.length; if (loopIndex < storeLength) { - System.arraycopy(source, loopIndex, store, storeIndex, storeLength - loopIndex); - storeIndex += storeLength - loopIndex; + final int left = storeLength - loopIndex; + System.arraycopy(source, position + loopIndex, store, storeIndex, left); + storeIndex += left; } return storeIndex; } diff --git a/src/main/java/org/truffleruby/core/format/read/bytes/ReadStringPointerNode.java b/src/main/java/org/truffleruby/core/format/read/bytes/ReadStringPointerNode.java index 375394c6e300..1ac4e6f622ea 100644 --- a/src/main/java/org/truffleruby/core/format/read/bytes/ReadStringPointerNode.java +++ b/src/main/java/org/truffleruby/core/format/read/bytes/ReadStringPointerNode.java @@ -10,13 +10,12 @@ package org.truffleruby.core.format.read.bytes; import com.oracle.truffle.api.interop.InteropLibrary; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.format.FormatFrameDescriptor; import org.truffleruby.core.format.FormatNode; import org.truffleruby.core.format.MissingValue; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.extra.ffi.Pointer; import org.truffleruby.language.Nil; import org.truffleruby.language.control.RaiseException; @@ -31,7 +30,7 @@ @NodeChild("value") public abstract class ReadStringPointerNode extends FormatNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromByteArrayNode fromByteArrayNode = TruffleString.FromByteArrayNode.create(); private final BranchProfile errorProfile = BranchProfile.create(); private final int limit; @@ -59,7 +58,7 @@ protected RubyString read(VirtualFrame frame, long address, interop, 0, limit); - return makeStringNode.executeMake(bytes, Encodings.US_ASCII, CodeRange.CR_7BIT); + return createString(fromByteArrayNode, bytes, Encodings.US_ASCII); } private void checkAssociated(Pointer[] associated, Pointer reading) { diff --git a/src/main/java/org/truffleruby/core/format/read/bytes/ReadUTF8CharacterNode.java b/src/main/java/org/truffleruby/core/format/read/bytes/ReadUTF8CharacterNode.java index 9ac4f13265d2..e1d626ffcd8b 100644 --- a/src/main/java/org/truffleruby/core/format/read/bytes/ReadUTF8CharacterNode.java +++ b/src/main/java/org/truffleruby/core/format/read/bytes/ReadUTF8CharacterNode.java @@ -37,11 +37,11 @@ protected Object read(VirtualFrame frame, byte[] source, @Cached BranchProfile errorProfile, @Cached ConditionProfile rangeProfile) { final int index = getSourcePosition(frame); - final int sourceLength = getSourceLength(frame); + final int end = getSourceEnd(frame); assert index != -1; - if (rangeProfile.profile(index >= sourceLength)) { + if (rangeProfile.profile(index >= end)) { return MissingValue.INSTANCE; } @@ -71,9 +71,9 @@ protected Object read(VirtualFrame frame, byte[] source, length = 1; } - if (index + length > sourceLength) { + if (index + length > end) { errorProfile.enter(); - throw new InvalidFormatException(formatError(index, sourceLength, length)); + throw new InvalidFormatException(formatError(index, end, length)); } for (int n = 1; n < length; n++) { @@ -87,9 +87,8 @@ protected Object read(VirtualFrame frame, byte[] source, } @TruffleBoundary - private String formatError(final int index, final int sourceLength, final int length) { - return StringUtils - .format("malformed UTF-8 character (expected %d bytes, given %d bytes)", length, sourceLength - index); + private String formatError(int index, int end, int length) { + return StringUtils.format("malformed UTF-8 character (expected %d bytes, given %d bytes)", length, end - index); } } diff --git a/src/main/java/org/truffleruby/core/format/read/bytes/ReadUUStringNode.java b/src/main/java/org/truffleruby/core/format/read/bytes/ReadUUStringNode.java index e0e2ac7241e6..cd8571fdfc35 100644 --- a/src/main/java/org/truffleruby/core/format/read/bytes/ReadUUStringNode.java +++ b/src/main/java/org/truffleruby/core/format/read/bytes/ReadUUStringNode.java @@ -48,11 +48,10 @@ import java.nio.ByteBuffer; import java.util.Arrays; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.format.FormatNode; import org.truffleruby.core.format.read.SourceNode; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.string.StringNodes; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.NodeChild; @@ -62,7 +61,7 @@ @NodeChild(value = "source", type = SourceNode.class) public abstract class ReadUUStringNode extends FormatNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromByteArrayNode fromByteArrayNode = TruffleString.FromByteArrayNode.create(); @Specialization protected Object encode(VirtualFrame frame, byte[] source) { @@ -72,7 +71,7 @@ protected Object encode(VirtualFrame frame, byte[] source) { setSourcePosition(frame, encode.position()); - return makeStringNode.executeMake(bytes, Encodings.BINARY, CodeRange.CR_UNKNOWN); + return createString(fromByteArrayNode, bytes, Encodings.BINARY); } @TruffleBoundary diff --git a/src/main/java/org/truffleruby/core/format/unpack/UnpackCompiler.java b/src/main/java/org/truffleruby/core/format/unpack/UnpackCompiler.java index 3825cbb210be..0f2eb23cc2f7 100644 --- a/src/main/java/org/truffleruby/core/format/unpack/UnpackCompiler.java +++ b/src/main/java/org/truffleruby/core/format/unpack/UnpackCompiler.java @@ -15,7 +15,7 @@ import org.truffleruby.core.format.pack.SimplePackParser; import com.oracle.truffle.api.RootCallTarget; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.StringOperations; import org.truffleruby.language.control.DeferredRaiseException; public class UnpackCompiler { @@ -37,7 +37,7 @@ public RootCallTarget compile(String format) throws DeferredRaiseException { builder.enterSequence(); - final SimplePackParser parser = new SimplePackParser(builder, RopeOperations.encodeAsciiBytes(format)); + final SimplePackParser parser = new SimplePackParser(builder, StringOperations.encodeAsciiBytes(format)); parser.parse(); builder.exitSequence(); diff --git a/src/main/java/org/truffleruby/core/format/unpack/UnpackRootNode.java b/src/main/java/org/truffleruby/core/format/unpack/UnpackRootNode.java index b7205cdd1016..85c1bab30fc9 100644 --- a/src/main/java/org/truffleruby/core/format/unpack/UnpackRootNode.java +++ b/src/main/java/org/truffleruby/core/format/unpack/UnpackRootNode.java @@ -41,9 +41,10 @@ public Object execute(VirtualFrame frame) { final Object[] arguments = frame.getArguments(); frame.setObject(FormatFrameDescriptor.SOURCE_SLOT, arguments[0]); - frame.setInt(FormatFrameDescriptor.SOURCE_LENGTH_SLOT, (int) arguments[1]); - frame.setInt(FormatFrameDescriptor.SOURCE_POSITION_SLOT, 0); - frame.setObject(FormatFrameDescriptor.SOURCE_ASSOCIATED_SLOT, arguments[2]); + frame.setInt(FormatFrameDescriptor.SOURCE_END_POSITION_SLOT, (int) arguments[1]); + frame.setInt(FormatFrameDescriptor.SOURCE_START_POSITION_SLOT, (int) arguments[2]); + frame.setInt(FormatFrameDescriptor.SOURCE_POSITION_SLOT, (int) arguments[2]); + frame.setObject(FormatFrameDescriptor.SOURCE_ASSOCIATED_SLOT, arguments[3]); frame.setObject(FormatFrameDescriptor.OUTPUT_SLOT, new Object[expectedLength]); frame.setInt(FormatFrameDescriptor.OUTPUT_POSITION_SLOT, 0); diff --git a/src/main/java/org/truffleruby/core/format/write/bytes/EncodeUM.java b/src/main/java/org/truffleruby/core/format/write/bytes/EncodeUM.java index 651701c2d799..e98a558a1b2c 100644 --- a/src/main/java/org/truffleruby/core/format/write/bytes/EncodeUM.java +++ b/src/main/java/org/truffleruby/core/format/write/bytes/EncodeUM.java @@ -36,16 +36,17 @@ ***** END LICENSE BLOCK *****/ package org.truffleruby.core.format.write.bytes; +import com.oracle.truffle.api.strings.InternalByteArray; import org.truffleruby.collections.ByteArrayBuilder; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.StringOperations; public class EncodeUM { - private static final byte[] uu_table = RopeOperations + private static final byte[] uu_table = StringOperations .encodeAsciiBytes("`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"); - private static final byte[] b64_table = RopeOperations + private static final byte[] b64_table = StringOperations .encodeAsciiBytes("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"); - public static final byte[] sHexDigits = RopeOperations.encodeAsciiBytes("0123456789abcdef0123456789ABCDEFx"); + public static final byte[] sHexDigits = StringOperations.encodeAsciiBytes("0123456789abcdef0123456789ABCDEFx"); public static final int[] b64_xtable = new int[256]; static { @@ -58,41 +59,39 @@ public class EncodeUM { } } - public static void encodeUM(Object runtime, byte[] lCurElemString, int occurrences, boolean ignoreStar, char type, + public static void encodeUM(InternalByteArray lCurElemString, int occurrences, boolean ignoreStar, char type, ByteArrayBuilder result) { if (occurrences == 0 && type == 'm' && !ignoreStar) { encodes( - runtime, result, - lCurElemString, - 0, - lCurElemString.length, - lCurElemString.length, + lCurElemString.getArray(), + lCurElemString.getOffset(), + lCurElemString.getLength(), + lCurElemString.getLength(), (byte) type, false); return; } occurrences = occurrences <= 2 ? 45 : occurrences / 3 * 3; - if (lCurElemString.length == 0) { + if (lCurElemString.getLength() == 0) { return; } - byte[] charsToEncode = lCurElemString; - for (int i = 0; i < lCurElemString.length; i += occurrences) { + byte[] charsToEncode = lCurElemString.getArray(); + for (int i = 0; i < lCurElemString.getLength(); i += occurrences) { encodes( - runtime, result, charsToEncode, - i, - lCurElemString.length - i, + i + lCurElemString.getOffset(), + lCurElemString.getLength() - i, occurrences, (byte) type, true); } } - private static ByteArrayBuilder encodes(Object runtime, ByteArrayBuilder io2Append, byte[] charsToEncode, + private static ByteArrayBuilder encodes(ByteArrayBuilder io2Append, byte[] charsToEncode, int startIndex, int length, int charCount, byte encodingType, boolean tailLf) { charCount = charCount < length ? charCount : length; diff --git a/src/main/java/org/truffleruby/core/format/write/bytes/WriteBase64StringNode.java b/src/main/java/org/truffleruby/core/format/write/bytes/WriteBase64StringNode.java index c91ea0b116a6..21e0edd9817f 100644 --- a/src/main/java/org/truffleruby/core/format/write/bytes/WriteBase64StringNode.java +++ b/src/main/java/org/truffleruby/core/format/write/bytes/WriteBase64StringNode.java @@ -9,17 +9,17 @@ */ package org.truffleruby.core.format.write.bytes; +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.collections.ByteArrayBuilder; import org.truffleruby.core.format.FormatNode; -import org.truffleruby.core.format.exceptions.NoImplicitConversionException; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; +import org.truffleruby.language.library.RubyStringLibrary; @NodeChild("value") public abstract class WriteBase64StringNode extends FormatNode { @@ -32,29 +32,24 @@ public WriteBase64StringNode(int length, boolean ignoreStar) { this.ignoreStar = ignoreStar; } - @Specialization - protected Object write(long bytes) { - throw new NoImplicitConversionException(bytes, "String"); - } + @Specialization(guards = "libString.isRubyString(string)", limit = "1") + protected Object write(VirtualFrame frame, Object string, + @Cached RubyStringLibrary libString, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode) { + var tstring = libString.getTString(string); + var encoding = libString.getTEncoding(string); - @Specialization - protected Object write(VirtualFrame frame, byte[] bytes) { - writeBytes(frame, encode(bytes)); - return null; - } + writeBytes(frame, encode(byteArrayNode.execute(tstring, encoding))); - @Specialization - protected Object write(VirtualFrame frame, Rope rope, - @Cached RopeNodes.BytesNode bytesNode) { - return write(frame, bytesNode.execute(rope)); + return null; } @TruffleBoundary - private byte[] encode(byte[] bytes) { + private byte[] encode(InternalByteArray byteArray) { // TODO CS 30-Mar-15 should write our own optimisable version of Base64 final ByteArrayBuilder output = new ByteArrayBuilder(); - EncodeUM.encodeUM(null, bytes, length, ignoreStar, 'm', output); + EncodeUM.encodeUM(byteArray, length, ignoreStar, 'm', output); return output.getBytes(); } diff --git a/src/main/java/org/truffleruby/core/format/write/bytes/WriteBinaryStringNode.java b/src/main/java/org/truffleruby/core/format/write/bytes/WriteBinaryStringNode.java index ade588d6fe4e..ea82ff4600ce 100644 --- a/src/main/java/org/truffleruby/core/format/write/bytes/WriteBinaryStringNode.java +++ b/src/main/java/org/truffleruby/core/format/write/bytes/WriteBinaryStringNode.java @@ -9,15 +9,15 @@ */ package org.truffleruby.core.format.write.bytes; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.format.FormatNode; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; import org.truffleruby.language.Nil; +import org.truffleruby.language.library.RubyStringLibrary; @NodeChild("value") public abstract class WriteBinaryStringNode extends FormatNode { @@ -57,39 +57,39 @@ protected Object write(VirtualFrame frame, Nil nil) { return null; } - @Specialization - protected Object write(VirtualFrame frame, byte[] bytes) { - final int lengthFromBytes; + @Specialization(guards = "libString.isRubyString(string)", limit = "1") + protected Object write(VirtualFrame frame, Object string, + @Cached RubyStringLibrary libString, + @Cached TruffleString.GetInternalByteArrayNode getInternalByteArrayNode) { + var tstring = libString.getTString(string); + var byteArray = getInternalByteArrayNode.execute(tstring, libString.getTEncoding(string)); + write(frame, byteArray.getArray(), byteArray.getOffset(), byteArray.getLength()); + return null; + } + private void write(VirtualFrame frame, byte[] bytes, int offset, int length) { + final int lengthFromBytes; if (takeAll) { - lengthFromBytes = bytes.length; + lengthFromBytes = length; } else { - lengthFromBytes = Math.min(width, bytes.length); + lengthFromBytes = Math.min(width, length); } if (pad) { final int lengthFromPadding = width - lengthFromBytes; - writeBytes(frame, bytes, lengthFromBytes); + writeBytes(frame, bytes, offset, lengthFromBytes); for (int n = 0; n < lengthFromPadding; n++) { writeByte(frame, padding); } } else { - writeBytes(frame, bytes, lengthFromBytes); + writeBytes(frame, bytes, offset, lengthFromBytes); } if (appendNull) { writeByte(frame, (byte) 0); } - - return null; - } - - @Specialization - protected Object write(VirtualFrame frame, Rope rope, - @Cached RopeNodes.BytesNode bytesNode) { - return write(frame, bytesNode.execute(rope)); } } diff --git a/src/main/java/org/truffleruby/core/format/write/bytes/WriteBitStringNode.java b/src/main/java/org/truffleruby/core/format/write/bytes/WriteBitStringNode.java index 8f7834dbf8f5..5541fc683de1 100644 --- a/src/main/java/org/truffleruby/core/format/write/bytes/WriteBitStringNode.java +++ b/src/main/java/org/truffleruby/core/format/write/bytes/WriteBitStringNode.java @@ -47,14 +47,15 @@ import java.nio.ByteOrder; +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.format.FormatNode; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; +import org.truffleruby.language.library.RubyStringLibrary; @NodeChild("value") public abstract class WriteBitStringNode extends FormatNode { @@ -69,18 +70,23 @@ public WriteBitStringNode(ByteOrder byteOrder, boolean star, int length) { this.length = length; } - @Specialization - protected Object write(VirtualFrame frame, Rope rope, - @Cached RopeNodes.BytesNode bytesNode) { - return write(frame, bytesNode.execute(rope)); + @Specialization(guards = "libString.isRubyString(string)", limit = "1") + protected Object write(VirtualFrame frame, Object string, + @Cached RubyStringLibrary libString, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode) { + var tstring = libString.getTString(string); + var encoding = libString.getTEncoding(string); + + return write(frame, byteArrayNode.execute(tstring, encoding)); } - @Specialization - protected Object write(VirtualFrame frame, byte[] bytes) { + protected Object write(VirtualFrame frame, InternalByteArray byteArray) { int occurrences; + int byteLength = byteArray.getLength(); + if (star) { - occurrences = bytes.length; + occurrences = byteLength; } else { occurrences = length; } @@ -88,14 +94,14 @@ protected Object write(VirtualFrame frame, byte[] bytes) { int currentByte = 0; int padLength = 0; - if (occurrences > bytes.length) { - padLength = (occurrences - bytes.length) / 2 + (occurrences + bytes.length) % 2; - occurrences = bytes.length; + if (occurrences > byteLength) { + padLength = (occurrences - byteLength) / 2 + (occurrences + byteLength) % 2; + occurrences = byteLength; } if (byteOrder == ByteOrder.LITTLE_ENDIAN) { for (int i = 0; i < occurrences;) { - if ((bytes[i++] & 1) != 0) { // if the low bit is set + if ((byteArray.get(i++) & 1) != 0) { // if the low bit is set currentByte |= 128; //set the high bit of the result } @@ -115,7 +121,7 @@ protected Object write(VirtualFrame frame, byte[] bytes) { } } else { for (int i = 0; i < occurrences;) { - currentByte |= bytes[i++] & 1; + currentByte |= byteArray.get(i++) & 1; // we filled up current byte; append it and create next one if ((i & 7) == 0) { diff --git a/src/main/java/org/truffleruby/core/format/write/bytes/WriteBytesNode.java b/src/main/java/org/truffleruby/core/format/write/bytes/WriteBytesNode.java index 4bbf9c096bd2..04a0e27284cb 100644 --- a/src/main/java/org/truffleruby/core/format/write/bytes/WriteBytesNode.java +++ b/src/main/java/org/truffleruby/core/format/write/bytes/WriteBytesNode.java @@ -9,14 +9,14 @@ */ package org.truffleruby.core.format.write.bytes; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.format.FormatNode; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; +import org.truffleruby.language.library.RubyStringLibrary; @NodeChild("value") public abstract class WriteBytesNode extends FormatNode { @@ -27,10 +27,13 @@ protected Object write(VirtualFrame frame, byte[] bytes) { return null; } - @Specialization - protected Object writeRope(VirtualFrame frame, Rope rope, - @Cached RopeNodes.BytesNode bytesNode) { - writeBytes(frame, bytesNode.execute(rope)); + @Specialization(guards = "libString.isRubyString(string)", limit = "1") + protected Object writeString(VirtualFrame frame, Object string, + @Cached RubyStringLibrary libString, + @Cached TruffleString.GetInternalByteArrayNode getInternalByteArrayNode) { + var tstring = libString.getTString(string); + var byteArray = getInternalByteArrayNode.execute(tstring, libString.getTEncoding(string)); + writeBytes(frame, byteArray.getArray(), byteArray.getOffset(), byteArray.getLength()); return null; } diff --git a/src/main/java/org/truffleruby/core/format/write/bytes/WriteHexStringNode.java b/src/main/java/org/truffleruby/core/format/write/bytes/WriteHexStringNode.java index 9f83497d1d2b..cd97a35a29f7 100644 --- a/src/main/java/org/truffleruby/core/format/write/bytes/WriteHexStringNode.java +++ b/src/main/java/org/truffleruby/core/format/write/bytes/WriteHexStringNode.java @@ -47,14 +47,15 @@ import java.nio.ByteOrder; +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.format.FormatNode; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; +import org.truffleruby.language.library.RubyStringLibrary; @NodeChild("value") public abstract class WriteHexStringNode extends FormatNode { @@ -67,20 +68,23 @@ public WriteHexStringNode(ByteOrder byteOrder, int length) { this.length = length; } - @Specialization - protected Object write(VirtualFrame frame, Rope rope, - @Cached RopeNodes.BytesNode bytesNode) { - return write(frame, bytesNode.execute(rope)); + @Specialization(guards = "libString.isRubyString(string)", limit = "1") + protected Object write(VirtualFrame frame, Object string, + @Cached RubyStringLibrary libString, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode) { + var tstring = libString.getTString(string); + var encoding = libString.getTEncoding(string); + + return write(frame, byteArrayNode.execute(tstring, encoding)); } - @Specialization - protected Object write(VirtualFrame frame, byte[] bytes) { + protected Object write(VirtualFrame frame, InternalByteArray byteArray) { int currentByte = 0; final int lengthToUse; if (length == -1) { - lengthToUse = bytes.length; + lengthToUse = byteArray.getLength(); } else { lengthToUse = length; } @@ -88,8 +92,8 @@ protected Object write(VirtualFrame frame, byte[] bytes) { for (int n = 0; n < lengthToUse; n++) { byte currentChar; - if (n < bytes.length) { - currentChar = bytes[n]; + if (n < byteArray.getLength()) { + currentChar = byteArray.get(n); } else { currentChar = 0; } diff --git a/src/main/java/org/truffleruby/core/format/write/bytes/WriteMIMEStringNode.java b/src/main/java/org/truffleruby/core/format/write/bytes/WriteMIMEStringNode.java index aef30aa8515d..0cdba20588e9 100644 --- a/src/main/java/org/truffleruby/core/format/write/bytes/WriteMIMEStringNode.java +++ b/src/main/java/org/truffleruby/core/format/write/bytes/WriteMIMEStringNode.java @@ -41,6 +41,8 @@ */ package org.truffleruby.core.format.write.bytes; +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.collections.ByteArrayBuilder; import org.truffleruby.core.format.FormatNode; @@ -50,10 +52,9 @@ import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.StringOperations; import org.truffleruby.language.Nil; +import org.truffleruby.language.library.RubyStringLibrary; @NodeChild("value") public abstract class WriteMIMEStringNode extends FormatNode { @@ -69,28 +70,28 @@ protected Object write(Nil nil) { return null; } - @Specialization - protected Object write(VirtualFrame frame, byte[] bytes) { - writeBytes(frame, encode(bytes)); - return null; - } + @Specialization(guards = "libString.isRubyString(string)", limit = "1") + protected Object write(VirtualFrame frame, Object string, + @Cached RubyStringLibrary libString, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode) { + var tstring = libString.getTString(string); + var encoding = libString.getTEncoding(string); - @Specialization - protected Object write(VirtualFrame frame, Rope rope, - @Cached RopeNodes.BytesNode bytesNode) { - return write(frame, bytesNode.execute(rope)); + writeBytes(frame, encode(byteArrayNode.execute(tstring, encoding))); + + return null; } @TruffleBoundary - private byte[] encode(byte[] bytes) { + private byte[] encode(InternalByteArray byteArray) { // TODO CS 30-Mar-15 should write our own optimizable version of MIME final ByteArrayBuilder output = new ByteArrayBuilder(); - qpencode(output, bytes, length); + qpencode(output, byteArray, length); return output.getBytes(); } - private static final byte[] hex_table = RopeOperations.encodeAsciiBytes("0123456789ABCDEF"); + private static final byte[] hex_table = StringOperations.encodeAsciiBytes("0123456789ABCDEF"); /** encodes a String with the Quoted printable, MIME encoding (see RFC2045). appends the result of the encoding in a * StringBuffer @@ -99,15 +100,15 @@ private byte[] encode(byte[] bytes) { * @param i2Encode The String to encode * @param iLength The max number of characters to encode * @return the io2Append buffer **/ - public static ByteArrayBuilder qpencode(ByteArrayBuilder io2Append, byte[] i2Encode, int iLength) { + public static ByteArrayBuilder qpencode(ByteArrayBuilder io2Append, InternalByteArray i2Encode, int iLength) { io2Append.unsafeEnsureSpace(1024); int lCurLineLength = 0; int lPrevChar = -1; - byte[] l2Encode = i2Encode; + try { - int end = i2Encode.length; + int end = i2Encode.getLength(); for (int i = 0; i < end; i++) { - int lCurChar = l2Encode[i] & 0xff; + int lCurChar = i2Encode.get(i) & 0xff; if (lCurChar > 126 || (lCurChar < 32 && lCurChar != '\n' && lCurChar != '\t') || lCurChar == '=') { io2Append.append('='); io2Append.append(hex_table[lCurChar >>> 4]); diff --git a/src/main/java/org/truffleruby/core/format/write/bytes/WritePaddedBytesNode.java b/src/main/java/org/truffleruby/core/format/write/bytes/WritePaddedBytesNode.java index 01d25c5c5f5b..831d6d39a573 100644 --- a/src/main/java/org/truffleruby/core/format/write/bytes/WritePaddedBytesNode.java +++ b/src/main/java/org/truffleruby/core/format/write/bytes/WritePaddedBytesNode.java @@ -9,17 +9,18 @@ */ package org.truffleruby.core.format.write.bytes; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; +import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.format.FormatNode; import org.truffleruby.core.format.printf.PrintfSimpleTreeBuilder; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.string.StringNodes; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; import com.oracle.truffle.api.profiles.ConditionProfile; +import org.truffleruby.language.library.RubyStringLibrary; /** Simply write bytes. */ @NodeChild("width") @@ -36,52 +37,60 @@ public WritePaddedBytesNode(boolean leftJustified) { this.leftJustified = leftJustified; } - @Specialization - protected Object write(VirtualFrame frame, int padding, int precision, Rope rope, - @Cached RopeNodes.BytesNode bytesNode, - @Cached RopeNodes.CharacterLengthNode charLengthNode, - @Cached StringNodes.ByteIndexFromCharIndexNode indexNode) { + @Specialization(guards = "libString.isRubyString(string)", limit = "1") + protected Object write(VirtualFrame frame, int padding, int precision, Object string, + @Cached RubyStringLibrary libString, + @Cached TruffleString.CodePointLengthNode codePointLengthNode, + @Cached TruffleString.CodePointIndexToByteIndexNode codePointIndexToByteIndexNode, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode) { if (padding == PrintfSimpleTreeBuilder.DEFAULT) { padding = 0; } - final byte[] bytes = bytesNode.execute(rope); + + var tstring = libString.getTString(string); + var encoding = libString.getEncoding(string); if (leftJustifiedProfile.profile(leftJustified || padding < 0)) { - writeStringBytes(frame, precision, rope, bytesNode, indexNode); - writePaddingBytes(frame, Math.abs(padding), precision, rope, charLengthNode); + writeStringBytes(frame, precision, tstring, encoding, codePointIndexToByteIndexNode, byteArrayNode); + writePaddingBytes(frame, Math.abs(padding), precision, tstring, encoding, codePointLengthNode); } else { - writePaddingBytes(frame, padding, precision, rope, charLengthNode); - writeStringBytes(frame, precision, rope, bytesNode, indexNode); + writePaddingBytes(frame, padding, precision, tstring, encoding, codePointLengthNode); + writeStringBytes(frame, precision, tstring, encoding, codePointIndexToByteIndexNode, byteArrayNode); } + return null; } - private void writeStringBytes(VirtualFrame frame, int precision, Rope rope, - RopeNodes.BytesNode bytesNode, - StringNodes.ByteIndexFromCharIndexNode indexNode) { - byte[] bytes = bytesNode.execute(rope); + private void writeStringBytes(VirtualFrame frame, int precision, + AbstractTruffleString tstring, RubyEncoding encoding, + TruffleString.CodePointIndexToByteIndexNode codePointIndexToByteIndexNode, + TruffleString.GetInternalByteArrayNode byteArrayNode) { + var byteArray = byteArrayNode.execute(tstring, encoding.tencoding); int length; - if (precisionProfile.profile(precision >= 0 && bytes.length > precision)) { - int index = indexNode.execute(rope, 0, precision); + + if (precisionProfile.profile(precision >= 0 && byteArray.getLength() > precision)) { + int index = codePointIndexToByteIndexNode.execute(tstring, 0, precision, encoding.tencoding); if (index >= 0) { length = index; } else { - length = bytes.length; + length = byteArray.getLength(); } } else { - length = bytes.length; + length = byteArray.getLength(); } - writeBytes(frame, bytes, length); + + writeBytes(frame, byteArray.getArray(), byteArray.getOffset(), length); } - private void writePaddingBytes(VirtualFrame frame, int padding, int precision, Rope rope, - RopeNodes.CharacterLengthNode lengthNode) { + private void writePaddingBytes(VirtualFrame frame, int padding, int precision, AbstractTruffleString tstring, + RubyEncoding encoding, + TruffleString.CodePointLengthNode codePointLengthNode) { if (paddingProfile.profile(padding > 0)) { - int ropeLength = lengthNode.execute(rope); + int codePointLength = codePointLengthNode.execute(tstring, encoding.tencoding); int padBytes; - if (precision > 0 && ropeLength > precision) { + if (precision > 0 && codePointLength > precision) { padBytes = padding - precision; - } else if (padding > 0 && padding > ropeLength) { - padBytes = padding - ropeLength; + } else if (padding > 0 && padding > codePointLength) { + padBytes = padding - codePointLength; } else { padBytes = 0; } diff --git a/src/main/java/org/truffleruby/core/format/write/bytes/WriteUTF8CharacterNode.java b/src/main/java/org/truffleruby/core/format/write/bytes/WriteUTF8CharacterNode.java index 2f162d194fe9..43ea04429b00 100644 --- a/src/main/java/org/truffleruby/core/format/write/bytes/WriteUTF8CharacterNode.java +++ b/src/main/java/org/truffleruby/core/format/write/bytes/WriteUTF8CharacterNode.java @@ -47,55 +47,33 @@ import org.truffleruby.core.format.FormatNode; import org.truffleruby.core.format.exceptions.RangeException; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.string.UTF8Operations; -import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; -import com.oracle.truffle.api.profiles.ConditionProfile; @NodeChild("value") public abstract class WriteUTF8CharacterNode extends FormatNode { @Specialization(guards = { "value >= 0", "value <= 0x7f" }) - protected Object writeSingleByte(VirtualFrame frame, long value, - @Cached ConditionProfile rangeProfile) { + protected Object writeSingleByte(VirtualFrame frame, long value) { writeByte(frame, (byte) value); - - if (rangeProfile.profile(UTF8Operations.isUTF8ValidOneByte((byte) value))) { - setStringCodeRange(frame, CodeRange.CR_7BIT); - } else { - setStringCodeRange(frame, CodeRange.CR_BROKEN); - } - return null; } @Specialization(guards = { "value > 0x7f", "value <= 0x7ff" }) - protected Object writeTwoBytes(VirtualFrame frame, long value, - @Cached ConditionProfile rangeProfile) { + protected Object writeTwoBytes(VirtualFrame frame, long value) { final byte[] bytes = { (byte) (((value >>> 6) & 0xff) | 0xc0), (byte) ((value & 0x3f) | 0x80) }; writeBytes(frame, bytes); - increaseStringLength(frame, -2 + 1); - - if (rangeProfile.profile(UTF8Operations.isUTF8ValidTwoBytes(bytes))) { - setStringCodeRange(frame, CodeRange.CR_VALID); - } else { - setStringCodeRange(frame, CodeRange.CR_BROKEN); - } - return null; } @Specialization(guards = { "value > 0x7ff", "value <= 0xffff" }) - protected Object writeThreeBytes(VirtualFrame frame, long value, - @Cached ConditionProfile rangeProfile) { + protected Object writeThreeBytes(VirtualFrame frame, long value) { final byte[] bytes = { (byte) (((value >>> 12) & 0xff) | 0xe0), (byte) (((value >>> 6) & 0x3f) | 0x80), @@ -103,20 +81,11 @@ protected Object writeThreeBytes(VirtualFrame frame, long value, }; writeBytes(frame, bytes); - increaseStringLength(frame, -3 + 1); - - if (rangeProfile.profile(UTF8Operations.isUTF8ValidThreeBytes(bytes))) { - setStringCodeRange(frame, CodeRange.CR_VALID); - } else { - setStringCodeRange(frame, CodeRange.CR_BROKEN); - } - return null; } @Specialization(guards = { "value > 0xffff", "value <= 0x1fffff" }) - protected Object writeFourBytes(VirtualFrame frame, long value, - @Cached ConditionProfile rangeProfile) { + protected Object writeFourBytes(VirtualFrame frame, long value) { final byte[] bytes = { (byte) (((value >>> 18) & 0xff) | 0xf0), (byte) (((value >>> 12) & 0x3f) | 0x80), @@ -125,20 +94,11 @@ protected Object writeFourBytes(VirtualFrame frame, long value, }; writeBytes(frame, bytes); - increaseStringLength(frame, -4 + 1); - - if (rangeProfile.profile(UTF8Operations.isUTF8ValidFourBytes(bytes))) { - setStringCodeRange(frame, CodeRange.CR_VALID); - } else { - setStringCodeRange(frame, CodeRange.CR_BROKEN); - } - return null; } @Specialization(guards = { "value > 0x1fffff", "value <= 0x3ffffff" }) - protected Object writeFiveBytes(VirtualFrame frame, long value, - @Cached ConditionProfile rangeProfile) { + protected Object writeFiveBytes(VirtualFrame frame, long value) { final byte[] bytes = { (byte) (((value >>> 24) & 0xff) | 0xf8), (byte) (((value >>> 18) & 0x3f) | 0x80), @@ -148,20 +108,11 @@ protected Object writeFiveBytes(VirtualFrame frame, long value, }; writeBytes(frame, bytes); - increaseStringLength(frame, -5 + 1); - - if (rangeProfile.profile(UTF8Operations.isUTF8ValidFiveBytes(bytes))) { - setStringCodeRange(frame, CodeRange.CR_VALID); - } else { - setStringCodeRange(frame, CodeRange.CR_BROKEN); - } - return null; } @Specialization(guards = { "value > 0x3ffffff", "value <= 0x7fffffff" }) - protected Object writeSixBytes(VirtualFrame frame, long value, - @Cached ConditionProfile rangeProfile) { + protected Object writeSixBytes(VirtualFrame frame, long value) { final byte[] bytes = { (byte) (((value >>> 30) & 0xff) | 0xfc), (byte) (((value >>> 24) & 0x3f) | 0x80), @@ -172,14 +123,6 @@ protected Object writeSixBytes(VirtualFrame frame, long value, }; writeBytes(frame, bytes); - increaseStringLength(frame, -6 + 1); - - if (rangeProfile.profile(UTF8Operations.isUTF8ValidSixBytes(bytes))) { - setStringCodeRange(frame, CodeRange.CR_VALID); - } else { - setStringCodeRange(frame, CodeRange.CR_BROKEN); - } - return null; } diff --git a/src/main/java/org/truffleruby/core/format/write/bytes/WriteUUStringNode.java b/src/main/java/org/truffleruby/core/format/write/bytes/WriteUUStringNode.java index fe6ce456647b..330040125754 100644 --- a/src/main/java/org/truffleruby/core/format/write/bytes/WriteUUStringNode.java +++ b/src/main/java/org/truffleruby/core/format/write/bytes/WriteUUStringNode.java @@ -9,17 +9,17 @@ */ package org.truffleruby.core.format.write.bytes; +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.collections.ByteArrayBuilder; import org.truffleruby.core.format.FormatNode; -import org.truffleruby.core.format.exceptions.NoImplicitConversionException; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; +import org.truffleruby.language.library.RubyStringLibrary; /** Read a string that contains UU-encoded data and write as actual binary data. */ @NodeChild("value") @@ -33,34 +33,24 @@ public WriteUUStringNode(int length, boolean ignoreStar) { this.ignoreStar = ignoreStar; } - @Specialization - protected Object write(long bytes) { - throw new NoImplicitConversionException(bytes, "String"); - } + @Specialization(guards = "libString.isRubyString(string)", limit = "1") + protected Object write(VirtualFrame frame, Object string, + @Cached RubyStringLibrary libString, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode) { + var tstring = libString.getTString(string); + var encoding = libString.getTEncoding(string); - @Specialization(guards = "isEmpty(bytes)") - protected Object writeEmpty(VirtualFrame frame, byte[] bytes) { - return null; - } + writeBytes(frame, encode(byteArrayNode.execute(tstring, encoding))); - @Specialization(guards = "!isEmpty(bytes)") - protected Object write(VirtualFrame frame, byte[] bytes) { - writeBytes(frame, encode(bytes)); return null; } - @Specialization - protected Object write(VirtualFrame frame, Rope rope, - @Cached RopeNodes.BytesNode bytesNode) { - return write(frame, bytesNode.execute(rope)); - } - @TruffleBoundary - private byte[] encode(byte[] bytes) { + private byte[] encode(InternalByteArray byteArray) { // TODO CS 30-Mar-15 should write our own optimizable version of UU final ByteArrayBuilder output = new ByteArrayBuilder(); - EncodeUM.encodeUM(null, bytes, length, ignoreStar, 'u', output); + EncodeUM.encodeUM(byteArray, length, ignoreStar, 'u', output); return output.getBytes(); } diff --git a/src/main/java/org/truffleruby/core/hash/HashingNodes.java b/src/main/java/org/truffleruby/core/hash/HashingNodes.java index f1be97d50a60..06c73246adc8 100644 --- a/src/main/java/org/truffleruby/core/hash/HashingNodes.java +++ b/src/main/java/org/truffleruby/core/hash/HashingNodes.java @@ -19,7 +19,7 @@ import org.truffleruby.core.numeric.BigIntegerOps; import org.truffleruby.core.numeric.RubyBignum; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; +import org.truffleruby.core.string.StringHelperNodes; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.core.symbol.SymbolNodes; import org.truffleruby.core.string.ImmutableRubyString; @@ -94,13 +94,13 @@ protected int hashBignum(RubyBignum value) { @Specialization protected int hashString(RubyString value, - @Cached StringNodes.HashStringNode stringHashNode) { + @Cached StringHelperNodes.HashStringNode stringHashNode) { return (int) stringHashNode.execute(value); } @Specialization protected int hashImmutableString(ImmutableRubyString value, - @Cached StringNodes.HashStringNode stringHashNode) { + @Cached StringHelperNodes.HashStringNode stringHashNode) { return (int) stringHashNode.execute(value); } diff --git a/src/main/java/org/truffleruby/core/inlined/InlinedByteSizeNode.java b/src/main/java/org/truffleruby/core/inlined/InlinedByteSizeNode.java index 8c994bd4e8a9..a8a440b8f0ca 100644 --- a/src/main/java/org/truffleruby/core/inlined/InlinedByteSizeNode.java +++ b/src/main/java/org/truffleruby/core/inlined/InlinedByteSizeNode.java @@ -17,6 +17,7 @@ import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; +import org.truffleruby.language.library.RubyStringLibrary; import org.truffleruby.language.methods.LookupMethodOnSelfNode; public abstract class InlinedByteSizeNode extends UnaryInlinedOperationNode { @@ -32,8 +33,9 @@ public InlinedByteSizeNode(RubyLanguage language, RubyCallNodeParameters callNod assumptions = "assumptions", limit = "1") protected int byteSize(VirtualFrame frame, RubyString self, - @Cached LookupMethodOnSelfNode lookupNode) { - return self.rope.byteLength(); + @Cached LookupMethodOnSelfNode lookupNode, + @Cached RubyStringLibrary libString) { + return libString.byteLength(self); } @Specialization( @@ -41,8 +43,9 @@ protected int byteSize(VirtualFrame frame, RubyString self, assumptions = "assumptions", limit = "1") protected int byteSizeImmutable(VirtualFrame frame, ImmutableRubyString self, - @Cached LookupMethodOnSelfNode lookupNode) { - return self.rope.byteLength(); + @Cached LookupMethodOnSelfNode lookupNode, + @Cached RubyStringLibrary libString) { + return libString.byteLength(self); } @Specialization diff --git a/src/main/java/org/truffleruby/core/inlined/InlinedEqualNode.java b/src/main/java/org/truffleruby/core/inlined/InlinedEqualNode.java index cc7c1f830f1a..99329946fd94 100644 --- a/src/main/java/org/truffleruby/core/inlined/InlinedEqualNode.java +++ b/src/main/java/org/truffleruby/core/inlined/InlinedEqualNode.java @@ -11,9 +11,9 @@ import com.oracle.truffle.api.Assumption; import com.oracle.truffle.api.dsl.Cached; -import com.oracle.truffle.api.library.CachedLibrary; import org.truffleruby.RubyLanguage; -import org.truffleruby.core.string.StringNodes; +import org.truffleruby.core.encoding.EncodingNodes; +import org.truffleruby.core.string.StringHelperNodes; import org.truffleruby.language.dispatch.RubyCallNodeParameters; import com.oracle.truffle.api.dsl.Specialization; @@ -61,17 +61,23 @@ protected boolean doubleLong(double a, long b) { @Specialization( guards = { - "stringsSelf.isRubyString(self)", - "stringsB.isRubyString(b)", - "lookupNode.lookupProtected(frame, self, METHOD) == coreMethods().STRING_EQUAL" + "libA.isRubyString(a)", + "libB.isRubyString(b)", + "lookupNode.lookupProtected(frame, a, METHOD) == coreMethods().STRING_EQUAL" }, - assumptions = "assumptions") - protected boolean stringEqual(VirtualFrame frame, Object self, Object b, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsSelf, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsB, + assumptions = "assumptions", limit = "1") + protected boolean stringEqual(VirtualFrame frame, Object a, Object b, + @Cached RubyStringLibrary libA, + @Cached RubyStringLibrary libB, @Cached LookupMethodOnSelfNode lookupNode, - @Cached StringNodes.StringEqualNode stringEqualNode) { - return stringEqualNode.executeStringEqual(stringsSelf.getRope(self), stringsB.getRope(b)); + @Cached EncodingNodes.NegotiateCompatibleStringEncodingNode negotiateCompatibleStringEncodingNode, + @Cached StringHelperNodes.StringEqualInternalNode stringEqualInternalNode) { + var tstringA = libA.getTString(a); + var encA = libA.getEncoding(a); + var tstringB = libB.getTString(b); + var encB = libB.getEncoding(b); + var compatibleEncoding = negotiateCompatibleStringEncodingNode.execute(tstringA, encA, tstringB, encB); + return stringEqualInternalNode.executeInternal(tstringA, tstringB, compatibleEncoding); } @Specialization diff --git a/src/main/java/org/truffleruby/core/kernel/KernelNodes.java b/src/main/java/org/truffleruby/core/kernel/KernelNodes.java index bafb3de5dcfb..04e8f5412722 100644 --- a/src/main/java/org/truffleruby/core/kernel/KernelNodes.java +++ b/src/main/java/org/truffleruby/core/kernel/KernelNodes.java @@ -22,6 +22,8 @@ import com.oracle.truffle.api.interop.InteropLibrary; import com.oracle.truffle.api.interop.UnsupportedMessageException; import com.oracle.truffle.api.object.PropertyGetter; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import com.oracle.truffle.api.utilities.AssumedValue; import org.truffleruby.RubyContext; import org.truffleruby.builtins.CoreMethod; @@ -49,6 +51,7 @@ import org.truffleruby.core.cast.ToStringOrSymbolNode; import org.truffleruby.core.cast.ToSymbolNode; import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.exception.GetBacktraceException; import org.truffleruby.core.format.BytesResult; import org.truffleruby.core.format.FormatExceptionTranslator; @@ -73,15 +76,9 @@ import org.truffleruby.core.proc.RubyProc; import org.truffleruby.core.range.RangeNodes; import org.truffleruby.core.range.RubyIntOrLongRange; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringCachingGuards; +import org.truffleruby.core.string.StringHelperNodes; import org.truffleruby.core.string.StringNodes; -import org.truffleruby.core.string.StringNodes.MakeStringNode; -import org.truffleruby.core.string.StringOperations; import org.truffleruby.core.support.TypeNodes; import org.truffleruby.core.support.TypeNodes.CheckFrozenNode; import org.truffleruby.core.support.TypeNodes.ObjectInstanceVariablesNode; @@ -256,26 +253,26 @@ protected boolean refEqualOrEql(Object a, Object b, @Primitive(name = "find_file") public abstract static class FindFileNode extends PrimitiveArrayArgumentsNode { - @Specialization(guards = "libFeatureString.isRubyString(featureString)") + @Specialization(guards = "libFeatureString.isRubyString(featureString)", limit = "1") protected Object findFile(Object featureString, @Cached BranchProfile notFoundProfile, - @Cached MakeStringNode makeStringNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFeatureString) { - String feature = libFeatureString.getJavaString(featureString); - return findFileString(feature, notFoundProfile, makeStringNode); + @Cached TruffleString.FromJavaStringNode fromJavaStringNode, + @Cached RubyStringLibrary libFeatureString, + @Cached ToJavaStringNode toJavaStringNode) { + String feature = toJavaStringNode.executeToJavaString(featureString); + return findFileString(feature, notFoundProfile, fromJavaStringNode); } @Specialization protected Object findFileString(String featureString, @Cached BranchProfile notFoundProfile, - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { final String expandedPath = getContext().getFeatureLoader().findFeature(featureString); if (expandedPath == null) { notFoundProfile.enter(); return nil; } - return makeStringNode - .executeMake(expandedPath, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, expandedPath, Encodings.UTF_8); } } @@ -283,12 +280,12 @@ protected Object findFileString(String featureString, @Primitive(name = "get_caller_path") public abstract static class GetCallerPathNode extends PrimitiveArrayArgumentsNode { - @Specialization(guards = "libFeature.isRubyString(feature)") + @Specialization(guards = "libFeature.isRubyString(feature)", limit = "1") @TruffleBoundary protected RubyString getCallerPath(Object feature, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFeature, - @Cached MakeStringNode makeStringNode) { - final String featureString = libFeature.getJavaString(feature); + @Cached RubyStringLibrary libFeature, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { + final String featureString = RubyGuards.getJavaString(feature); final String featurePath; if (new File(featureString).isAbsolute()) { featurePath = featureString; @@ -314,11 +311,7 @@ protected RubyString getCallerPath(Object feature, // symlinks. MRI does this for #require_relative always, but not for #require, so we // need to do it to be compatible in the case the path does not exist, so the // LoadError's #path is the same as MRI's. - return makeStringNode - .executeMake( - Paths.get(featurePath).normalize().toString(), - Encodings.UTF_8, - CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, Paths.get(featurePath).normalize().toString(), Encodings.UTF_8); } } @@ -328,11 +321,12 @@ public abstract static class LoadFeatureNode extends PrimitiveArrayArgumentsNode @Child private RequireNode requireNode = RequireNodeGen.create(); - @Specialization(guards = "libFeatureString.isRubyString(featureString)") + @Specialization(guards = "libFeatureString.isRubyString(featureString)", limit = "1") protected boolean loadFeature(Object featureString, Object expandedPathString, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFeatureString) { + @Cached RubyStringLibrary libFeatureString, + @Cached ToJavaStringNode toJavaStringNode) { return requireNode.executeRequire( - libFeatureString.getJavaString(featureString), + toJavaStringNode.executeToJavaString(featureString), expandedPathString); } @@ -393,15 +387,15 @@ protected RubySymbol calleeName() { @Primitive(name = "canonicalize_path") public abstract static class CanonicalizePathNode extends PrimitiveArrayArgumentsNode { - @Specialization(guards = "strings.isRubyString(string)") + @Specialization(guards = "strings.isRubyString(string)", limit = "1") @TruffleBoundary protected RubyString canonicalPath(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached StringNodes.MakeStringNode makeStringNode) { + @Cached RubyStringLibrary strings, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { final String expandedPath = getContext() .getFeatureLoader() - .canonicalize(strings.getJavaString(string)); - return makeStringNode.executeMake(expandedPath, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + .canonicalize(RubyGuards.getJavaString(string)); + return createString(fromJavaStringNode, expandedPath, Encodings.UTF_8); } } @@ -723,7 +717,6 @@ protected Object eval(Frame callerFrame, Object callerSelf, Object[] rubyArgs, R @ReportPolymorphism @GenerateUncached - @ImportStatic({ StringCachingGuards.class, StringOperations.class }) public abstract static class EvalInternalNode extends RubyBaseNode { public abstract Object execute(Object self, Object source, RubyBinding binding, Object file, int line); @@ -732,35 +725,40 @@ public abstract static class EvalInternalNode extends RubyBaseNode { guards = { "libSource.isRubyString(source)", "libFile.isRubyString(file)", - "equalNode.execute(libSource.getRope(source), cachedSource)", - "equalNode.execute(libFile.getRope(file), cachedFile)", + "codeEqualNode.execute(libSource, source, cachedSource, cachedSourceEnc)", + "fileEqualNode.execute(libFile, file, cachedFile, cachedFileEnc)", "line == cachedLine", "bindingDescriptor == getBindingDescriptor(binding)" }, limit = "getCacheLimit()") protected Object evalCached(Object self, Object source, RubyBinding binding, Object file, int line, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libSource, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFile, - @Cached("libSource.getRope(source)") Rope cachedSource, - @Cached("libFile.getRope(file)") Rope cachedFile, + @Cached RubyStringLibrary libSource, + @Cached RubyStringLibrary libFile, + @Cached("asTruffleStringUncached(source)") TruffleString cachedSource, + @Cached("libSource.getEncoding(source)") RubyEncoding cachedSourceEnc, + @Cached("asTruffleStringUncached(file)") TruffleString cachedFile, + @Cached("libFile.getEncoding(file)") RubyEncoding cachedFileEnc, @Cached("line") int cachedLine, @Cached("getBindingDescriptor(binding)") FrameDescriptor bindingDescriptor, - @Cached("parse(cachedSource, binding.getFrame(), cachedFile, cachedLine)") RootCallTarget callTarget, + @Cached("parse(cachedSource, cachedSourceEnc, binding.getFrame(), getJavaString(file), cachedLine)") RootCallTarget callTarget, @Cached("assignsNewUserVariables(getDescriptor(callTarget))") boolean assignsNewUserVariables, @Cached("create(callTarget)") DirectCallNode callNode, - @Cached RopeNodes.EqualNode equalNode) { + @Cached StringHelperNodes.EqualSameEncodingNode codeEqualNode, + @Cached StringHelperNodes.EqualNode fileEqualNode) { Object[] rubyArgs = prepareEvalArgs(callTarget, assignsNewUserVariables, self, binding); return callNode.call(rubyArgs); } @Specialization( guards = { "libSource.isRubyString(source)", "libFile.isRubyString(file)" }, - replaces = "evalCached") + replaces = "evalCached", limit = "1") protected Object evalBindingUncached(Object self, Object source, RubyBinding binding, Object file, int line, @Cached IndirectCallNode callNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libSource) { + @Cached RubyStringLibrary libFile, + @Cached RubyStringLibrary libSource, + @Cached ToJavaStringNode toJavaStringNode) { - var callTarget = parse(libSource.getRope(source), binding.getFrame(), libFile.getRope(file), line); + var callTarget = parse(libSource.getTString(source), libSource.getEncoding(source), binding.getFrame(), + toJavaStringNode.executeToJavaString(file), line); boolean assignsNewUserVariables = assignsNewUserVariables(getDescriptor(callTarget)); Object[] rubyArgs = prepareEvalArgs(callTarget, assignsNewUserVariables, self, binding); @@ -783,11 +781,12 @@ private Object[] prepareEvalArgs(RootCallTarget callTarget, boolean assignsNewUs } @TruffleBoundary - protected RootCallTarget parse(Rope sourceText, MaterializedFrame parentFrame, Rope file, int line) { + protected RootCallTarget parse(AbstractTruffleString sourceText, RubyEncoding encoding, + MaterializedFrame parentFrame, String file, int line) { //intern() to improve footprint - final String sourceFile = RopeOperations.decodeRope(file).intern(); - final RubySource source = EvalLoader - .createEvalSource(getContext(), sourceText, "eval", sourceFile, line, this); + final String sourceFile = file.intern(); + final RubySource source = EvalLoader.createEvalSource(getContext(), sourceText, encoding, "eval", + sourceFile, line, this); final LexicalScope lexicalScope = RubyArguments.getMethod(parentFrame).getLexicalScope(); return getContext() .getCodeLoader() @@ -886,13 +885,13 @@ protected long hashBignum(RubyBignum value) { @Specialization protected long hashString(RubyString value, - @Cached StringNodes.HashStringNode stringHashNode) { + @Cached StringHelperNodes.HashStringNode stringHashNode) { return stringHashNode.execute(value); } @Specialization protected long hashImmutableString(ImmutableRubyString value, - @Cached StringNodes.HashStringNode stringHashNode) { + @Cached StringHelperNodes.HashStringNode stringHashNode) { return stringHashNode.execute(value); } @@ -1658,13 +1657,12 @@ public static long sleepFor(RubyContext context, RubyThread thread, long duratio } @CoreMethod(names = { "format", "sprintf" }, isModuleFunction = true, rest = true, required = 1) - @ImportStatic({ StringCachingGuards.class, StringOperations.class }) @ReportPolymorphism @NodeChild(value = "format", type = RubyBaseNodeWithExecute.class) @NodeChild(value = "arguments", type = RubyBaseNodeWithExecute.class) public abstract static class SprintfNode extends CoreMethodNode { - @Child private MakeStringNode makeStringNode; + @Child private TruffleString.FromByteArrayNode fromByteArrayNode; @Child private BooleanCastNode readDebugGlobalNode = BooleanCastNodeGen .create(ReadGlobalVariableNodeGen.create("$DEBUG")); @@ -1679,15 +1677,17 @@ protected ToStrNode coerceFormatToString(RubyBaseNodeWithExecute format) { @Specialization( guards = { "libFormat.isRubyString(format)", - "equalNode.execute(libFormat.getRope(format), cachedFormatRope)", - "isDebug(frame) == cachedIsDebug" }) + "equalNode.execute(libFormat, format, cachedTString, cachedEncoding)", + "isDebug(frame) == cachedIsDebug" }, + limit = "3") protected RubyString formatCached(VirtualFrame frame, Object format, Object[] arguments, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFormat, + @Cached RubyStringLibrary libFormat, @Cached("isDebug(frame)") boolean cachedIsDebug, - @Cached("libFormat.getRope(format)") Rope cachedFormatRope, - @Cached("cachedFormatRope.byteLength()") int cachedFormatLength, - @Cached("create(compileFormat(format, arguments, isDebug(frame), libFormat))") DirectCallNode callPackNode, - @Cached RopeNodes.EqualNode equalNode) { + @Cached("asTruffleStringUncached(format)") TruffleString cachedTString, + @Cached("libFormat.getEncoding(format)") RubyEncoding cachedEncoding, + @Cached("cachedTString.byteLength(cachedEncoding.tencoding)") int cachedFormatLength, + @Cached("create(compileFormat(cachedTString, cachedEncoding, arguments, isDebug(frame)))") DirectCallNode callPackNode, + @Cached StringHelperNodes.EqualSameEncodingNode equalNode) { final BytesResult result; try { result = (BytesResult) callPackNode.call( @@ -1702,22 +1702,24 @@ protected RubyString formatCached(VirtualFrame frame, Object format, Object[] ar @Specialization( guards = "libFormat.isRubyString(format)", - replaces = "formatCached") + replaces = "formatCached", limit = "1") protected RubyString formatUncached(VirtualFrame frame, Object format, Object[] arguments, @Cached IndirectCallNode callPackNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFormat) { + @Cached RubyStringLibrary libFormat) { final BytesResult result; final boolean isDebug = readDebugGlobalNode.execute(frame); + var tstring = libFormat.getTString(format); + var encoding = libFormat.getEncoding(format); try { result = (BytesResult) callPackNode.call( - compileFormat(format, arguments, isDebug, libFormat), + compileFormat(tstring, encoding, arguments, isDebug), new Object[]{ arguments, arguments.length, null }); } catch (FormatException e) { exceptionProfile.enter(); throw FormatExceptionTranslator.translate(getContext(), this, e); } - return finishFormat(libFormat.getRope(format).byteLength(), result); + return finishFormat(tstring.byteLength(encoding.tencoding), result); } private RubyString finishFormat(int formatLength, BytesResult result) { @@ -1727,23 +1729,20 @@ private RubyString finishFormat(int formatLength, BytesResult result) { bytes = Arrays.copyOf(bytes, result.getOutputLength()); } - if (makeStringNode == null) { + if (fromByteArrayNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); - makeStringNode = insert(MakeStringNode.create()); + fromByteArrayNode = insert(TruffleString.FromByteArrayNode.create()); } - return makeStringNode.executeMake( - bytes, - result.getEncoding().getEncodingForLength(formatLength), - result.getStringCodeRange()); + return createString(fromByteArrayNode, bytes, result.getEncoding().getEncodingForLength(formatLength)); } @TruffleBoundary - protected RootCallTarget compileFormat(Object format, Object[] arguments, boolean isDebug, - RubyStringLibrary libFormat) { + protected RootCallTarget compileFormat(AbstractTruffleString tstring, RubyEncoding encoding, Object[] arguments, + boolean isDebug) { try { return new PrintfCompiler(getLanguage(), this) - .compile(libFormat.getRope(format), arguments, isDebug); + .compile(tstring, encoding, arguments, isDebug); } catch (InvalidFormatException e) { throw new RaiseException(getContext(), coreExceptions().argumentError(e.getMessage(), this)); } @@ -1884,7 +1883,7 @@ public static ToSNode create() { @Specialization protected RubyString toS(Object self, @Cached LogicalClassNode classNode, - @Cached MakeStringNode makeStringNode, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode, @Cached ObjectIDNode objectIDNode, @Cached ToHexStringNode toHexStringNode) { String className = classNode.execute(self).fields.getName(); @@ -1893,10 +1892,10 @@ protected RubyString toS(Object self, String javaString = Utils.concat("#<", className, ":0x", hexID, ">"); - return makeStringNode.executeMake( + return createString( + fromJavaStringNode, javaString, - Encodings.UTF_8, - CodeRange.CR_UNKNOWN); + Encodings.UTF_8); } @TruffleBoundary diff --git a/src/main/java/org/truffleruby/core/kernel/TruffleKernelNodes.java b/src/main/java/org/truffleruby/core/kernel/TruffleKernelNodes.java index 50b2a48a76b0..284b7f375488 100644 --- a/src/main/java/org/truffleruby/core/kernel/TruffleKernelNodes.java +++ b/src/main/java/org/truffleruby/core/kernel/TruffleKernelNodes.java @@ -13,8 +13,8 @@ import com.oracle.truffle.api.RootCallTarget; import com.oracle.truffle.api.frame.Frame; -import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.source.Source; +import com.oracle.truffle.api.strings.TruffleString; import org.graalvm.collections.Pair; import org.truffleruby.Layouts; import org.truffleruby.builtins.CoreMethod; @@ -30,9 +30,7 @@ import org.truffleruby.core.module.ModuleNodes; import org.truffleruby.core.module.RubyModule; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.string.StringNodes; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.language.FrameOrVariablesReadingNode; import org.truffleruby.language.LexicalScope; @@ -40,6 +38,7 @@ import org.truffleruby.language.ReadOwnFrameAndVariablesNode; import org.truffleruby.language.RubyBaseNode; import org.truffleruby.language.RubyBaseNodeWithExecute; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.RubyNode; import org.truffleruby.language.arguments.ReadCallerVariablesIfAvailableNode; import org.truffleruby.language.arguments.ReadCallerVariablesNode; @@ -95,12 +94,12 @@ protected RubyBaseNodeWithExecute coerceToBoolean(RubyBaseNodeWithExecute inheri } @TruffleBoundary - @Specialization(guards = "strings.isRubyString(file)") + @Specialization(guards = "strings.isRubyString(file)", limit = "1") protected boolean load(Object file, boolean wrap, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, + @Cached RubyStringLibrary strings, @Cached IndirectCallNode callNode) { - final String feature = strings.getJavaString(file); - final Pair sourceRopePair; + final String feature = RubyGuards.getJavaString(file); + final Pair sourceRopePair; try { final FileLoader fileLoader = new FileLoader(getContext(), getLanguage()); sourceRopePair = fileLoader.loadFile(feature); @@ -323,20 +322,20 @@ protected Object storage(VirtualFrame frame, @ImportStatic(Layouts.class) public abstract static class GetOriginalRequireNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization protected Object getOriginalRequire(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { final String originalRequire = getContext() .getCoreLibrary() .getOriginalRequires() - .get(strings.getJavaString(string)); + .get(RubyGuards.getJavaString(string)); if (originalRequire == null) { return Nil.INSTANCE; } else { - return makeStringNode.executeMake(originalRequire, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, originalRequire, Encodings.UTF_8); } } } diff --git a/src/main/java/org/truffleruby/core/method/MethodNodes.java b/src/main/java/org/truffleruby/core/method/MethodNodes.java index 57c61ed741af..0728c144cd71 100644 --- a/src/main/java/org/truffleruby/core/method/MethodNodes.java +++ b/src/main/java/org/truffleruby/core/method/MethodNodes.java @@ -11,6 +11,7 @@ import com.oracle.truffle.api.dsl.GenerateUncached; import com.oracle.truffle.api.frame.Frame; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreModule; @@ -30,9 +31,7 @@ import org.truffleruby.core.proc.ProcOperations; import org.truffleruby.core.proc.ProcType; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.language.RubyContextSourceNode; import org.truffleruby.language.RubyLambdaRootNode; @@ -212,7 +211,7 @@ protected Object receiver(RubyMethod method) { @CoreMethod(names = "source_location") public abstract static class SourceLocationNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -222,10 +221,10 @@ protected Object sourceLocation(RubyMethod method) { if (!sourceSection.isAvailable()) { return nil; } else { - RubyString file = makeStringNode.executeMake( + RubyString file = createString( + fromJavaStringNode, getLanguage().getSourcePath(sourceSection.getSource()), - Encodings.UTF_8, - CodeRange.CR_UNKNOWN); + Encodings.UTF_8); return createArray(new Object[]{ file, sourceSection.getStartLine() }); } } diff --git a/src/main/java/org/truffleruby/core/method/UnboundMethodNodes.java b/src/main/java/org/truffleruby/core/method/UnboundMethodNodes.java index e6c39b9bf95e..db54560350e8 100644 --- a/src/main/java/org/truffleruby/core/method/UnboundMethodNodes.java +++ b/src/main/java/org/truffleruby/core/method/UnboundMethodNodes.java @@ -11,6 +11,7 @@ import com.oracle.truffle.api.RootCallTarget; import com.oracle.truffle.api.nodes.NodeUtil; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreModule; @@ -24,9 +25,7 @@ import org.truffleruby.core.module.MethodLookupResult; import org.truffleruby.core.module.ModuleOperations; import org.truffleruby.core.module.RubyModule; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.language.RubyGuards; import org.truffleruby.language.Visibility; @@ -178,7 +177,7 @@ protected RubyArray parameters(RubyUnboundMethod method) { @CoreMethod(names = "source_location") public abstract static class SourceLocationNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -190,10 +189,10 @@ protected Object sourceLocation(RubyUnboundMethod unboundMethod) { if (!sourceSection.isAvailable()) { return nil; } else { - RubyString file = makeStringNode.executeMake( + RubyString file = createString( + fromJavaStringNode, getLanguage().getSourcePath(sourceSection.getSource()), - Encodings.UTF_8, - CodeRange.CR_UNKNOWN); + Encodings.UTF_8); Object[] objects = new Object[]{ file, sourceSection.getStartLine() }; return createArray(objects); } diff --git a/src/main/java/org/truffleruby/core/module/ModuleFields.java b/src/main/java/org/truffleruby/core/module/ModuleFields.java index 3e659fd9a240..aaaba302f519 100644 --- a/src/main/java/org/truffleruby/core/module/ModuleFields.java +++ b/src/main/java/org/truffleruby/core/module/ModuleFields.java @@ -28,17 +28,16 @@ import java.util.concurrent.locks.ReentrantLock; import com.oracle.truffle.api.object.DynamicObjectLibrary; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.collections.ConcurrentOperations; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.kernel.KernelNodes; import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.method.MethodEntry; import org.truffleruby.core.method.MethodFilter; -import org.truffleruby.core.rope.LeafRope; import org.truffleruby.core.string.ImmutableRubyString; -import org.truffleruby.core.string.StringOperations; import org.truffleruby.core.string.StringUtils; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.language.Nil; @@ -48,7 +47,6 @@ import org.truffleruby.language.constants.ConstantEntry; import org.truffleruby.language.constants.GetConstantNode; import org.truffleruby.language.control.RaiseException; -import org.truffleruby.language.library.RubyStringLibrary; import org.truffleruby.language.loader.ReentrantLockFreeingMap; import org.truffleruby.language.methods.InternalMethod; import org.truffleruby.language.objects.ObjectGraph; @@ -446,7 +444,7 @@ private RubyConstant setConstantInternal(RubyContext context, Node currentNode, SharedObjects.propagate(context.getLanguageSlow(), rubyModule, value); final String autoloadPath = autoload - ? RubyStringLibrary.getUncached().getJavaString(value) + ? RubyGuards.getJavaString(value) : null; RubyConstant previous; RubyConstant newConstant; @@ -721,8 +719,7 @@ public void setFullName(String name) { private void setName(String name) { this.name = name; if (hasPartialName()) { - LeafRope rope = StringOperations.encodeRope(name, UTF8Encoding.INSTANCE); - this.rubyStringName = language.getFrozenStringLiteral(rope); + this.rubyStringName = language.getFrozenStringLiteral(TStringUtils.utf8TString(name), Encodings.UTF_8); } } diff --git a/src/main/java/org/truffleruby/core/module/ModuleNodes.java b/src/main/java/org/truffleruby/core/module/ModuleNodes.java index c45710f16138..478fb5fc035d 100644 --- a/src/main/java/org/truffleruby/core/module/ModuleNodes.java +++ b/src/main/java/org/truffleruby/core/module/ModuleNodes.java @@ -27,6 +27,8 @@ import com.oracle.truffle.api.object.DynamicObjectLibrary; import com.oracle.truffle.api.profiles.ConditionProfile; import com.oracle.truffle.api.profiles.LoopConditionProfile; +import com.oracle.truffle.api.strings.TruffleString; +import com.oracle.truffle.api.strings.TruffleString.ByteIndexOfStringNode; import org.truffleruby.RubyContext; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; @@ -50,6 +52,7 @@ import org.truffleruby.core.cast.ToSymbolNode; import org.truffleruby.core.constant.WarnAlreadyInitializedNode; import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.inlined.AlwaysInlinedMethodNode; import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.method.MethodFilter; @@ -63,17 +66,13 @@ import org.truffleruby.core.module.ModuleNodesFactory.IsSubclassOfOrEqualToNodeFactory; import org.truffleruby.core.module.ModuleNodesFactory.SetMethodVisibilityNodeGen; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringCachingGuards; -import org.truffleruby.core.string.StringNodes.MakeStringNode; -import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.string.StringHelperNodes; import org.truffleruby.core.string.StringUtils; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.core.support.TypeNodes; import org.truffleruby.core.symbol.RubySymbol; +import org.truffleruby.interop.ToJavaStringNode; import org.truffleruby.language.LexicalScope; import org.truffleruby.language.Nil; import org.truffleruby.language.NotProvided; @@ -604,9 +603,9 @@ protected RubyBaseNodeWithExecute coerceFilenameToPath(RubyBaseNodeWithExecute f } @TruffleBoundary - @Specialization(guards = "libFilename.isRubyString(filename)") + @Specialization(guards = "libFilename.isRubyString(filename)", limit = "1") protected Object autoload(RubyModule module, String name, Object filename, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFilename) { + @Cached RubyStringLibrary libFilename) { if (!Identifiers.isValidConstantName(name)) { throw new RaiseException( getContext(), @@ -617,11 +616,11 @@ protected Object autoload(RubyModule module, String name, Object filename, this)); } - if (libFilename.getRope(filename).isEmpty()) { + if (libFilename.getTString(filename).isEmpty()) { throw new RaiseException(getContext(), coreExceptions().argumentError("empty file name", this)); } - final String javaStringFilename = libFilename.getJavaString(filename); + final String javaStringFilename = RubyGuards.getJavaString(filename); module.fields.setAutoloadConstant(getContext(), this, name, filename, javaStringFilename); return nil; } @@ -668,33 +667,35 @@ public abstract static class ClassEvalNode extends CoreMethodArrayArgumentsNode @Child private ReadCallerFrameNode readCallerFrameNode = ReadCallerFrameNode.create(); - @Specialization(guards = { "libCode.isRubyString(code)" }) + @Specialization(guards = "libCode.isRubyString(code)", limit = "1") protected Object classEval( VirtualFrame frame, RubyModule module, Object code, NotProvided file, NotProvided line, Nil block, @Cached IndirectCallNode callNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libCode) { + @Cached RubyStringLibrary libCode) { return classEvalSource(frame, module, code, "(eval)", callNode); } - @Specialization(guards = { "libCode.isRubyString(code)", "libFile.isRubyString(file)" }) + @Specialization(guards = { "libCode.isRubyString(code)", "libFile.isRubyString(file)" }, limit = "1") protected Object classEval( VirtualFrame frame, RubyModule module, Object code, Object file, NotProvided line, Nil block, @Cached IndirectCallNode callNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libCode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFile) { - return classEvalSource(frame, module, code, libFile.getJavaString(file), callNode); + @Cached RubyStringLibrary libCode, + @Cached RubyStringLibrary libFile, + @Cached ToJavaStringNode toJavaStringNode) { + return classEvalSource(frame, module, code, toJavaStringNode.executeToJavaString(file), callNode); } - @Specialization(guards = { "libCode.isRubyString(code)", "wasProvided(file)" }) + @Specialization(guards = { "libCode.isRubyString(code)", "wasProvided(file)" }, limit = "1") protected Object classEval(VirtualFrame frame, RubyModule module, Object code, Object file, int line, Nil block, @Cached IndirectCallNode callNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libCode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFile) { + @Cached RubyStringLibrary libCode, + @Cached RubyStringLibrary libFile, + @Cached ToJavaStringNode toJavaStringNode) { final CodeLoader.DeferredCall deferredCall = classEvalSource( frame, module, code, - libFile.getJavaString(file), + toJavaStringNode.executeToJavaString(file), line); return deferredCall.call(callNode); } @@ -707,14 +708,15 @@ protected Object classEval( return classEvalSource(frame, module, toStrNode.execute(code), "(eval)", callNode); } - @Specialization(guards = { "libCode.isRubyString(code)", "wasProvided(file)" }) + @Specialization(guards = { "libCode.isRubyString(code)", "wasProvided(file)" }, limit = "1") protected Object classEval( VirtualFrame frame, RubyModule module, Object code, Object file, NotProvided line, Nil block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringLibrary, + @Cached RubyStringLibrary stringLibrary, + @Cached ToJavaStringNode toJavaStringNode, @Cached IndirectCallNode callNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libCode, + @Cached RubyStringLibrary libCode, @Cached ToStrNode toStrNode) { - final String javaString = stringLibrary.getJavaString(toStrNode.execute(file)); + final String javaString = toJavaStringNode.executeToJavaString(toStrNode.execute(file)); return classEvalSource(frame, module, code, javaString, callNode); } @@ -737,7 +739,8 @@ private CodeLoader.DeferredCall classEvalSourceInternal(RubyModule module, Objec String file, int line, MaterializedFrame callerFrame) { final RubySource source = EvalLoader.createEvalSource( getContext(), - RubyStringLibrary.getUncached().getRope(rubySource), + RubyStringLibrary.getUncached().getTString(rubySource), + RubyStringLibrary.getUncached().getEncoding(rubySource), "class/module_eval", file, line, @@ -978,12 +981,12 @@ protected boolean isConstDefined(RubyModule module, String fullName, boolean inh @NodeChild(value = "inherit", type = RubyNode.class) @NodeChild(value = "look_in_object", type = RubyNode.class) @NodeChild(value = "check_name", type = RubyNode.class) - @ImportStatic({ StringCachingGuards.class, StringOperations.class }) public abstract static class ConstGetNode extends PrimitiveNode { @Child private LookupConstantNode lookupConstantLookInObjectNode = LookupConstantNode.create(true, true); @Child private LookupConstantNode lookupConstantNode = LookupConstantNode.create(true, false); @Child private GetConstantNode getConstantNode = GetConstantNode.create(); + @Child private ByteIndexOfStringNode byteIndexOfStringNode; @CreateCast("name") protected RubyBaseNodeWithExecute coerceToSymbolOrString(RubyBaseNodeWithExecute name) { @@ -1011,43 +1014,46 @@ protected Object getConstantNoInherit( guards = { "stringsName.isRubyString(name)", "inherit", - "equalNode.execute(stringsName.getRope(name), cachedRope)", + "equalNode.execute(stringsName, name, cachedTString, cachedEncoding)", "!scoped", "checkName == cachedCheckName" }, limit = "getLimit()") protected Object getConstantStringCached( RubyModule module, Object name, boolean inherit, boolean lookInObject, boolean checkName, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsName, - @Cached("stringsName.getRope(name)") Rope cachedRope, - @Cached("stringsName.getJavaString(name)") String cachedString, + @Cached RubyStringLibrary stringsName, + @Cached("asTruffleStringUncached(name)") TruffleString cachedTString, + @Cached("stringsName.getEncoding(name)") RubyEncoding cachedEncoding, + @Cached("getJavaString(name)") String cachedString, @Cached("checkName") boolean cachedCheckName, - @Cached RopeNodes.EqualNode equalNode, + @Cached StringHelperNodes.EqualNode equalNode, @Cached("isScoped(cachedString)") boolean scoped) { return getConstant(module, cachedString, checkName, lookInObject); } @Specialization( - guards = { "stringsName.isRubyString(name)", "inherit", "!isScoped(stringsName.getRope(name))" }, - replaces = "getConstantStringCached") + guards = { "stringsName.isRubyString(name)", "inherit", "!isScoped(stringsName, name)" }, + replaces = "getConstantStringCached", limit = "1") protected Object getConstantString( RubyModule module, Object name, boolean inherit, boolean lookInObject, boolean checkName, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsName) { - return getConstant(module, stringsName.getJavaString(name), checkName, lookInObject); + @Cached RubyStringLibrary stringsName, + @Cached ToJavaStringNode toJavaStringNode) { + return getConstant(module, toJavaStringNode.executeToJavaString(name), checkName, lookInObject); } @Specialization( - guards = { "stringsName.isRubyString(name)", "!inherit", "!isScoped(stringsName.getRope(name))" }) + guards = { "stringsName.isRubyString(name)", "!inherit", "!isScoped(stringsName, name)" }, limit = "1") protected Object getConstantNoInheritString( RubyModule module, Object name, boolean inherit, boolean lookInObject, boolean checkName, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsName) { - return getConstantNoInherit(module, stringsName.getJavaString(name), checkName); + @Cached RubyStringLibrary stringsName, + @Cached ToJavaStringNode toJavaStringNode) { + return getConstantNoInherit(module, toJavaStringNode.executeToJavaString(name), checkName); } // Scoped String - @Specialization(guards = { "stringsName.isRubyString(name)", "isScoped(stringsName.getRope(name))" }) + @Specialization(guards = { "stringsName.isRubyString(name)", "isScoped(stringsName, name)" }, limit = "1") protected Object getConstantScoped( RubyModule module, Object name, boolean inherit, boolean lookInObject, boolean checkName, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsName) { + @Cached RubyStringLibrary stringsName) { return FAILURE; } @@ -1077,12 +1083,19 @@ private RubyConstant lookupConstantNoInherit(LexicalScope lexicalScope, RubyModu .getConstant(); } - @TruffleBoundary - boolean isScoped(Rope name) { - // TODO (eregon, 27 May 2015): Any way to make this efficient? - return RopeOperations.decodeRope(name).contains("::"); + boolean isScoped(RubyStringLibrary libString, Object string) { + if (byteIndexOfStringNode == null) { + CompilerDirectives.transferToInterpreterAndInvalidate(); + byteIndexOfStringNode = insert(ByteIndexOfStringNode.create()); + } + + var tstring = libString.getTString(string); + var encoding = libString.getTEncoding(string); + int byteLength = tstring.byteLength(encoding); + return byteIndexOfStringNode.execute(tstring, TStringConstants.COLON_COLON, 0, byteLength, encoding) >= 0; } + @TruffleBoundary boolean isScoped(String name) { return name.contains("::"); } @@ -1116,7 +1129,7 @@ protected Object constMissing(RubyModule module, String name) { @NodeChild(value = "inherit", type = RubyBaseNodeWithExecute.class) public abstract static class ConstSourceLocationNode extends CoreMethodNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @CreateCast("name") protected RubyBaseNodeWithExecute coerceToStringOrSymbol(RubyBaseNodeWithExecute name) { @@ -1128,12 +1141,12 @@ protected RubyBaseNodeWithExecute coerceToBoolean(RubyBaseNodeWithExecute inheri return BooleanCastWithDefaultNode.create(true, inherit); } - @Specialization(guards = { "strings.isRubyString(name)" }) + @Specialization(guards = "strings.isRubyString(name)", limit = "1") @TruffleBoundary protected Object constSourceLocation(RubyModule module, Object name, boolean inherit, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { final ConstantLookupResult lookupResult = ModuleOperations - .lookupScopedConstant(getContext(), module, strings.getJavaString(name), inherit, this, true); + .lookupScopedConstant(getContext(), module, RubyGuards.getJavaString(name), inherit, this, true); return getLocation(lookupResult); } @@ -1156,10 +1169,10 @@ private Object getLocation(ConstantLookupResult lookupResult) { if (!BacktraceFormatter.isAvailable(sourceSection)) { return createEmptyArray(); } else { - final RubyString file = makeStringNode.executeMake( + final RubyString file = createString( + fromJavaStringNode, getLanguage().getSourcePath(sourceSection.getSource()), - Encodings.UTF_8, - CodeRange.CR_UNKNOWN); + Encodings.UTF_8); return createArray(new Object[]{ file, sourceSection.getStartLine() }); } } @@ -2113,14 +2126,14 @@ private void removeMethod(RubyModule module, String name) { public abstract static class ToSNode extends CoreMethodArrayArgumentsNode { @Specialization protected RubyString toS(RubyModule module, - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { final String moduleName; if (module.fields.isRefinement()) { moduleName = module.fields.getRefinementName(); } else { moduleName = module.fields.getName(); } - return makeStringNode.executeMake(moduleName, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, moduleName, Encodings.UTF_8); } } diff --git a/src/main/java/org/truffleruby/core/numeric/FixnumLowerNode.java b/src/main/java/org/truffleruby/core/numeric/FixnumLowerNode.java index 6e91b4ed8465..afa66d4f0fe6 100644 --- a/src/main/java/org/truffleruby/core/numeric/FixnumLowerNode.java +++ b/src/main/java/org/truffleruby/core/numeric/FixnumLowerNode.java @@ -9,7 +9,6 @@ */ package org.truffleruby.core.numeric; -import org.truffleruby.core.CoreLibrary; import org.truffleruby.language.RubyBaseNodeWithExecute; import org.truffleruby.language.RubyContextSourceNode; @@ -41,12 +40,12 @@ protected int lower(int value) { return value; } - @Specialization(guards = "canLower(value)") + @Specialization(guards = "fitsInInteger(value)") protected int lower(long value) { return (int) value; } - @Specialization(guards = "!canLower(value)") + @Specialization(guards = "!fitsInInteger(value)") protected long lowerFails(long value) { return value; } @@ -55,10 +54,5 @@ protected long lowerFails(long value) { protected Object passThrough(Object value) { return value; } - - protected static boolean canLower(long value) { - return CoreLibrary.fitsIntoInteger(value); - } - } diff --git a/src/main/java/org/truffleruby/core/numeric/FloatNodes.java b/src/main/java/org/truffleruby/core/numeric/FloatNodes.java index 7ce1904a4759..a254124757e3 100644 --- a/src/main/java/org/truffleruby/core/numeric/FloatNodes.java +++ b/src/main/java/org/truffleruby/core/numeric/FloatNodes.java @@ -19,6 +19,7 @@ import com.oracle.truffle.api.profiles.BranchProfile; import com.oracle.truffle.api.profiles.ConditionProfile; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.SuppressFBWarnings; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; @@ -28,15 +29,11 @@ import org.truffleruby.builtins.PrimitiveArrayArgumentsNode; import org.truffleruby.core.array.RubyArray; import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.numeric.FloatNodesFactory.ModNodeFactory; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.core.string.StringUtils; import org.truffleruby.core.thread.RubyThread; -import org.truffleruby.language.NotProvided; import org.truffleruby.language.RubyDynamicObject; import org.truffleruby.language.Visibility; import org.truffleruby.language.control.RaiseException; @@ -883,7 +880,7 @@ protected double toF(double value) { @ImportStatic(Double.class) public abstract static class ToSNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); /* Ruby has complex custom formatting logic for floats. Our logic meets the specs but we suspect it's possibly * still not entirely correct. JRuby seems to be correct, but their logic is tied up in their printf @@ -891,52 +888,52 @@ public abstract static class ToSNode extends CoreMethodArrayArgumentsNode { @Specialization(guards = "value == POSITIVE_INFINITY") protected RubyString toSPositiveInfinity(double value, - @Cached("specialValueRope(POSITIVE_INFINITY)") Rope cachedRope) { - return makeStringNode.executeMake(cachedRope, Encodings.US_ASCII, NotProvided.INSTANCE); + @Cached("specialValueString(POSITIVE_INFINITY)") TruffleString cachedString) { + return createString(cachedString, Encodings.US_ASCII); } @Specialization(guards = "value == NEGATIVE_INFINITY") protected RubyString toSNegativeInfinity(double value, - @Cached("specialValueRope(NEGATIVE_INFINITY)") Rope cachedRope) { - return makeStringNode.executeMake(cachedRope, Encodings.US_ASCII, NotProvided.INSTANCE); + @Cached("specialValueString(NEGATIVE_INFINITY)") TruffleString cachedString) { + return createString(cachedString, Encodings.US_ASCII); } @Specialization(guards = "isNaN(value)") protected RubyString toSNaN(double value, - @Cached("specialValueRope(value)") Rope cachedRope) { - return makeStringNode.executeMake(cachedRope, Encodings.US_ASCII, NotProvided.INSTANCE); + @Cached("specialValueString(value)") TruffleString cachedString) { + return createString(cachedString, Encodings.US_ASCII); } @Specialization(guards = "hasNoExp(value)") protected RubyString toSNoExp(double value) { - return makeStringNode.executeMake(makeRopeNoExp(value, getLanguage().getCurrentThread()), - Encodings.US_ASCII, CodeRange.CR_7BIT); + return createString(fromJavaStringNode, makeStringNoExp(value, getLanguage().getCurrentThread()), + Encodings.US_ASCII); // CR_7BIT } @Specialization(guards = "hasLargeExp(value)") protected RubyString toSLargeExp(double value) { - return makeStringNode.executeMake(makeRopeLargeExp(value, getLanguage().getCurrentThread()), - Encodings.US_ASCII, CodeRange.CR_7BIT); + return createString(fromJavaStringNode, makeStringLargeExp(value, getLanguage().getCurrentThread()), + Encodings.US_ASCII); // CR_7BIT } @Specialization(guards = "hasSmallExp(value)") protected RubyString toSSmallExp(double value) { - return makeStringNode.executeMake(makeRopeSmallExp(value, getLanguage().getCurrentThread()), - Encodings.US_ASCII, CodeRange.CR_7BIT); + return createString(fromJavaStringNode, makeStringSmallExp(value, getLanguage().getCurrentThread()), + Encodings.US_ASCII); // CR_7BIT } @TruffleBoundary - private String makeRopeNoExp(double value, RubyThread thread) { + private String makeStringNoExp(double value, RubyThread thread) { return getNoExpFormat(thread).format(value); } @TruffleBoundary - private String makeRopeSmallExp(double value, RubyThread thread) { + private String makeStringSmallExp(double value, RubyThread thread) { return getSmallExpFormat(thread).format(value); } @TruffleBoundary - private String makeRopeLargeExp(double value, RubyThread thread) { + private String makeStringLargeExp(double value, RubyThread thread) { return getLargeExpFormat(thread).format(value); } @@ -955,8 +952,8 @@ protected static boolean hasSmallExp(double value) { return (abs < 0.0001) && (abs != 0.0); } - protected static Rope specialValueRope(double value) { - return RopeOperations.encodeAscii(Double.toString(value), Encodings.US_ASCII.jcoding); + protected static TruffleString specialValueString(double value) { + return TStringUtils.fromJavaString(Double.toString(value), Encodings.US_ASCII); } private DecimalFormat getNoExpFormat(RubyThread thread) { @@ -991,7 +988,7 @@ private DecimalFormat getLargeExpFormat(RubyThread thread) { @CoreMethod(names = "dtoa", visibility = Visibility.PRIVATE) public abstract static class DToANode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -1031,7 +1028,7 @@ protected RubyArray dToA(double value) { final int sign = value < 0 ? 1 : 0; return createArray(new Object[]{ - makeStringNode.executeMake(string, Encodings.UTF_8, CodeRange.CR_7BIT), + createString(fromJavaStringNode, string, Encodings.UTF_8), // CR_7BIT decimal, sign, string.length() diff --git a/src/main/java/org/truffleruby/core/numeric/IntegerNodes.java b/src/main/java/org/truffleruby/core/numeric/IntegerNodes.java index d1afce7ba7cf..e227860a435a 100644 --- a/src/main/java/org/truffleruby/core/numeric/IntegerNodes.java +++ b/src/main/java/org/truffleruby/core/numeric/IntegerNodes.java @@ -15,6 +15,7 @@ import com.oracle.truffle.api.dsl.Cached.Shared; import com.oracle.truffle.api.profiles.LoopConditionProfile; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreModule; @@ -34,11 +35,7 @@ import org.truffleruby.core.numeric.IntegerNodesFactory.PowNodeFactory; import org.truffleruby.core.numeric.IntegerNodesFactory.RightShiftNodeFactory; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.LazyIntRope; -import org.truffleruby.core.rope.Rope; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.language.NoImplicitCastsToLong; import org.truffleruby.language.NotProvided; import org.truffleruby.language.RubyBaseNodeWithExecute; @@ -1497,58 +1494,52 @@ protected double toF(RubyBignum value) { @CoreMethod(names = { "to_s", "inspect" }, optional = 1, lowerFixnum = 1) public abstract static class ToSNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); - @Specialization - protected RubyString toS(int n, NotProvided base) { - final Rope rope = new LazyIntRope(n); - return makeStringNode.fromRope(rope, Encodings.US_ASCII); + protected RubyString defaultBase10(long n, NotProvided base, + @Cached TruffleString.FromLongNode fromLongNode) { + var tstring = fromLongNode.execute(n, Encodings.US_ASCII.tencoding, true); + return createString(tstring, Encodings.US_ASCII); } @TruffleBoundary @Specialization - protected RubyString toS(long n, NotProvided base) { - if (CoreLibrary.fitsIntoInteger(n)) { - return toS((int) n, base); - } - - return makeStringNode.executeMake(Long.toString(n), Encodings.US_ASCII, CodeRange.CR_7BIT); + protected RubyString toS(RubyBignum value, NotProvided base, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { + return createString( + fromJavaStringNode, + BigIntegerOps.toString(value.value), + Encodings.US_ASCII); // CR_7BIT } - @TruffleBoundary - @Specialization - protected RubyString toS(RubyBignum value, NotProvided base) { - return makeStringNode.executeMake( - BigIntegerOps.toString(value.value), - Encodings.US_ASCII, - CodeRange.CR_7BIT); + @Specialization(guards = "base == 10") + protected RubyString base10(long n, int base, + @Cached TruffleString.FromLongNode fromLongNode) { + return defaultBase10(n, NotProvided.INSTANCE, fromLongNode); } @TruffleBoundary - @Specialization - protected RubyString toS(long n, int base) { - if (base == 10) { - return toS(n, NotProvided.INSTANCE); - } - + @Specialization(guards = "base != 10") + protected RubyString toS(long n, int base, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { if (base < 2 || base > 36) { throw new RaiseException(getContext(), coreExceptions().argumentErrorInvalidRadix(base, this)); } - return makeStringNode.executeMake(Long.toString(n, base), Encodings.US_ASCII, CodeRange.CR_7BIT); + return createString(fromJavaStringNode, Long.toString(n, base), Encodings.US_ASCII); // CR_7BIT } @TruffleBoundary @Specialization - protected RubyString toS(RubyBignum value, int base) { + protected RubyString toS(RubyBignum value, int base, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { if (base < 2 || base > 36) { throw new RaiseException(getContext(), coreExceptions().argumentErrorInvalidRadix(base, this)); } - return makeStringNode.executeMake( + return createString( + fromJavaStringNode, BigIntegerOps.toString(value.value, base), - Encodings.US_ASCII, - CodeRange.CR_7BIT); + Encodings.US_ASCII); // CR_7BIT } } @@ -1653,20 +1644,15 @@ protected int lower(int value) { return value; } - @Specialization(guards = "canLower(value)") + @Specialization(guards = "fitsInInteger(value)") protected int lower(long value) { return (int) value; } - @Specialization(guards = "!canLower(value)") + @Specialization(guards = "!fitsInInteger(value)") protected long lowerFails(long value) { return value; } - - protected static boolean canLower(long value) { - return CoreLibrary.fitsIntoInteger(value); - } - } @Primitive(name = "integer_ulong_from_bignum") diff --git a/src/main/java/org/truffleruby/core/numeric/RubyBignum.java b/src/main/java/org/truffleruby/core/numeric/RubyBignum.java index fa1efcd7fd85..f39ab24460df 100644 --- a/src/main/java/org/truffleruby/core/numeric/RubyBignum.java +++ b/src/main/java/org/truffleruby/core/numeric/RubyBignum.java @@ -21,7 +21,7 @@ import java.math.BigInteger; @ExportLibrary(InteropLibrary.class) -public class RubyBignum extends ImmutableRubyObjectNotCopyable { +public final class RubyBignum extends ImmutableRubyObjectNotCopyable { public final BigInteger value; diff --git a/src/main/java/org/truffleruby/core/proc/ProcNodes.java b/src/main/java/org/truffleruby/core/proc/ProcNodes.java index e1a322623236..31edac1b4da5 100644 --- a/src/main/java/org/truffleruby/core/proc/ProcNodes.java +++ b/src/main/java/org/truffleruby/core/proc/ProcNodes.java @@ -12,6 +12,7 @@ import com.oracle.truffle.api.RootCallTarget; import com.oracle.truffle.api.dsl.GenerateUncached; import com.oracle.truffle.api.frame.Frame; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreModule; @@ -26,9 +27,7 @@ import org.truffleruby.core.inlined.AlwaysInlinedMethodNode; import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.method.UnboundMethodNodes.MethodRuby2KeywordsNode; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.core.symbol.SymbolNodes; import org.truffleruby.language.Nil; import org.truffleruby.language.Visibility; @@ -257,7 +256,7 @@ protected RubyArray parameters(RubyProc proc) { @CoreMethod(names = "source_location") public abstract static class SourceLocationNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -268,10 +267,10 @@ protected Object sourceLocation(RubyProc proc) { if (!sourceSection.isAvailable() || sourcePath.endsWith("/lib/truffle/truffle/cext.rb")) { return nil; } else { - final RubyString file = makeStringNode.executeMake( + final RubyString file = createString( + fromJavaStringNode, sourcePath, - Encodings.UTF_8, - CodeRange.CR_UNKNOWN); + Encodings.UTF_8); return createArray(new Object[]{ file, sourceSection.getStartLine() }); } diff --git a/src/main/java/org/truffleruby/core/regexp/ClassicRegexp.java b/src/main/java/org/truffleruby/core/regexp/ClassicRegexp.java index 10a6dcd89a73..f597a6ded3ae 100644 --- a/src/main/java/org/truffleruby/core/regexp/ClassicRegexp.java +++ b/src/main/java/org/truffleruby/core/regexp/ClassicRegexp.java @@ -36,16 +36,15 @@ ***** END LICENSE BLOCK *****/ package org.truffleruby.core.regexp; -import static org.truffleruby.core.rope.CodeRange.CR_7BIT; -import static org.truffleruby.core.rope.CodeRange.CR_UNKNOWN; import static org.truffleruby.core.string.StringUtils.EMPTY_STRING_ARRAY; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Iterator; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleStringBuilder; import org.jcodings.Encoding; -import org.jcodings.specific.USASCIIEncoding; import org.joni.NameEntry; import org.joni.Option; import org.joni.Regex; @@ -53,13 +52,12 @@ import org.joni.exception.JOniException; import org.truffleruby.RubyContext; import org.truffleruby.SuppressFBWarnings; +import org.truffleruby.collections.ByteArrayBuilder; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.core.rope.RopeWithEncoding; +import org.truffleruby.core.string.ATStringWithEncoding; +import org.truffleruby.core.string.TStringBuilder; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.core.string.StringSupport; import org.truffleruby.core.string.StringUtils; import org.truffleruby.language.backtrace.BacktraceFormatter; @@ -74,7 +72,7 @@ public class ClassicRegexp implements ReOptions { private final RubyContext context; private final Regex pattern; - private final Rope str; + private final TStringWithEncoding str; private RegexpOptions options; public void setLiteral() { @@ -86,8 +84,8 @@ public Encoding getEncoding() { } public static Regex makeRegexp(RubyDeferredWarnings rubyDeferredWarnings, - RopeBuilder processedSource, RegexpOptions options, - RubyEncoding enc, Rope source, Node currentNode) throws DeferredRaiseException { + TStringBuilder processedSource, RegexpOptions options, + RubyEncoding enc, AbstractTruffleString source, Node currentNode) throws DeferredRaiseException { try { return new Regex( processedSource.getUnsafeBytes(), @@ -105,63 +103,58 @@ public static Regex makeRegexp(RubyDeferredWarnings rubyDeferredWarnings, } } - public static String getRegexErrorMessage(Rope source, Exception e, RegexpOptions options) { - return BacktraceFormatter.formatJavaThrowableMessage(e) + ": /" + - RopeOperations.decodeRope(source) + "/" + options.toOptionsString(); + public static String getRegexErrorMessage(AbstractTruffleString source, Exception e, RegexpOptions options) { + return BacktraceFormatter.formatJavaThrowableMessage(e) + ": /" + source + "/" + options.toOptionsString(); } - private static Regex getRegexpFromCache(RubyContext context, RopeBuilder bytes, RubyEncoding encoding, - RegexpOptions options, Rope source) throws DeferredRaiseException { + private static Regex getRegexpFromCache(TStringBuilder bytes, RubyEncoding encoding, RegexpOptions options, + AbstractTruffleString source) throws DeferredRaiseException { final Regex newRegex = makeRegexp(null, bytes, options, encoding, source, null); newRegex.setUserObject(bytes); return newRegex; } - public ClassicRegexp(RubyContext context, Rope str, RubyEncoding enc, RegexpOptions originalOptions) + public ClassicRegexp(RubyContext context, TStringWithEncoding strEnc, RegexpOptions originalOptions) throws DeferredRaiseException { this.context = context; this.options = originalOptions; - if (enc.jcoding.isDummy()) { + if (strEnc.encoding.isDummy) { throw new UnsupportedOperationException("can't make regexp with dummy encoding"); } RegexpOptions[] optionsArray = new RegexpOptions[]{ originalOptions }; RubyEncoding[] fixedEnc = new RubyEncoding[]{ null }; - RopeBuilder unescaped = preprocess(str, enc, fixedEnc, RegexpSupport.ErrorMode.RAISE); - final RubyEncoding computedEnc = computeRegexpEncoding(optionsArray, enc, fixedEnc); + TStringBuilder unescaped = preprocess(strEnc, strEnc.encoding, fixedEnc, RegexpSupport.ErrorMode.RAISE); + final RubyEncoding computedEnc = computeRegexpEncoding(optionsArray, strEnc.encoding, fixedEnc); this.pattern = getRegexpFromCache( - context, unescaped, computedEnc, options, - RopeOperations.withEncoding(str, computedEnc.jcoding)); + strEnc.forceEncoding(computedEnc).tstring); this.options = optionsArray[0]; - this.str = str; + this.str = strEnc; } @TruffleBoundary @SuppressWarnings("fallthrough") - private static boolean unescapeNonAscii(RopeBuilder to, Rope str, RubyEncoding enc, + private static boolean unescapeNonAscii(TStringBuilder to, TStringWithEncoding str, RubyEncoding enc, RubyEncoding[] encp, RegexpSupport.ErrorMode mode) throws DeferredRaiseException { boolean hasProperty = false; byte[] buf = null; - int p = 0; - int end = str.byteLength(); - final byte[] bytes = str.getBytes(); + var byteArray = str.getInternalByteArray(); + final int offset = byteArray.getOffset(); + int p = offset; + int end = byteArray.getEnd(); + final byte[] bytes = byteArray.getArray(); + var strInEnc = str.forceEncoding(enc); while (p < end) { - final int cl = StringSupport - .characterLength( - enc.jcoding, - enc.jcoding == str.getEncoding() ? str.getCodeRange() : CR_UNKNOWN, - bytes, - p, - end); + final int cl = strInEnc.characterLength(p - offset); if (cl <= 0) { - raisePreprocessError(str, "invalid multibyte character", mode); + raisePreprocessError("invalid multibyte character", mode); } if (cl > 1 || (bytes[p] & 0x80) != 0) { if (to != null) { @@ -171,7 +164,7 @@ private static boolean unescapeNonAscii(RopeBuilder to, Rope str, RubyEncoding e if (encp[0] == null) { encp[0] = enc; } else if (encp[0] != enc) { - raisePreprocessError(str, "non ASCII character in UTF-8 regexp", mode); + raisePreprocessError("non ASCII character in UTF-8 regexp", mode); } continue; } @@ -179,7 +172,7 @@ private static boolean unescapeNonAscii(RopeBuilder to, Rope str, RubyEncoding e switch (c = bytes[p++] & 0xff) { case '\\': if (p == end) { - raisePreprocessError(str, "too short escape sequence", mode); + raisePreprocessError("too short escape sequence", mode); } switch (c = bytes[p++] & 0xff) { @@ -209,7 +202,7 @@ private static boolean unescapeNonAscii(RopeBuilder to, Rope str, RubyEncoding e buf = new byte[1]; } int pbeg = p; - p = readEscapedByte(buf, 0, bytes, p, end, str, mode); + p = readEscapedByte(buf, 0, bytes, p, end, mode); c = buf[0]; if (c == -1) { return false; @@ -218,22 +211,22 @@ private static boolean unescapeNonAscii(RopeBuilder to, Rope str, RubyEncoding e to.append(bytes, pbeg, p - pbeg); } } else { - p = unescapeEscapedNonAscii(to, bytes, p, end, enc, encp, str, mode); + p = unescapeEscapedNonAscii(to, bytes, p, end, enc, encp, mode); } break; case 'u': if (p == end) { - raisePreprocessError(str, "too short escape sequence", mode); + raisePreprocessError("too short escape sequence", mode); } if (bytes[p] == (byte) '{') { /* \\u{H HH HHH HHHH HHHHH HHHHHH ...} */ p++; - p = unescapeUnicodeList(to, bytes, p, end, encp, str, mode); + p = unescapeUnicodeList(to, bytes, p, end, encp, mode); if (p == end || bytes[p++] != (byte) '}') { - raisePreprocessError(str, "invalid Unicode list", mode); + raisePreprocessError("invalid Unicode list", mode); } } else { /* \\uHHHH */ - p = unescapeUnicodeBmp(to, bytes, p, end, encp, str, mode); + p = unescapeUnicodeBmp(to, bytes, p, end, encp, mode); } break; case 'p': /* \p{Hiragana} */ @@ -264,22 +257,22 @@ private static boolean unescapeNonAscii(RopeBuilder to, Rope str, RubyEncoding e return hasProperty; } - private static int unescapeUnicodeBmp(RopeBuilder to, byte[] bytes, int p, int end, - RubyEncoding[] encp, Rope str, RegexpSupport.ErrorMode mode) throws DeferredRaiseException { + private static int unescapeUnicodeBmp(TStringBuilder to, byte[] bytes, int p, int end, + RubyEncoding[] encp, RegexpSupport.ErrorMode mode) throws DeferredRaiseException { if (p + 4 > end) { - raisePreprocessError(str, "invalid Unicode escape", mode); + raisePreprocessError("invalid Unicode escape", mode); } int code = StringSupport.scanHex(bytes, p, 4); int len = StringSupport.hexLength(bytes, p, 4); if (len != 4) { - raisePreprocessError(str, "invalid Unicode escape", mode); + raisePreprocessError("invalid Unicode escape", mode); } - appendUtf8(to, code, encp, str, mode); + appendUtf8(to, code, encp, mode); return p + 4; } - private static int unescapeUnicodeList(RopeBuilder to, byte[] bytes, int p, int end, - RubyEncoding[] encp, Rope str, RegexpSupport.ErrorMode mode) throws DeferredRaiseException { + private static int unescapeUnicodeList(TStringBuilder to, byte[] bytes, int p, int end, + RubyEncoding[] encp, RegexpSupport.ErrorMode mode) throws DeferredRaiseException { while (p < end && StringSupport.isAsciiSpace(bytes[p] & 0xff)) { p++; } @@ -292,11 +285,11 @@ private static int unescapeUnicodeList(RopeBuilder to, byte[] bytes, int p, int break; } if (len > 6) { - raisePreprocessError(str, "invalid Unicode range", mode); + raisePreprocessError("invalid Unicode range", mode); } p += len; if (to != null) { - appendUtf8(to, code, encp, str, mode); + appendUtf8(to, code, encp, mode); } hasUnicode = true; while (p < end && StringSupport.isAsciiSpace(bytes[p] & 0xff)) { @@ -305,14 +298,14 @@ private static int unescapeUnicodeList(RopeBuilder to, byte[] bytes, int p, int } if (!hasUnicode) { - raisePreprocessError(str, "invalid Unicode list", mode); + raisePreprocessError("invalid Unicode list", mode); } return p; } - private static void appendUtf8(RopeBuilder to, int code, RubyEncoding[] enc, Rope str, + private static void appendUtf8(TStringBuilder to, int code, RubyEncoding[] enc, RegexpSupport.ErrorMode mode) throws DeferredRaiseException { - checkUnicodeRange(code, str, mode); + checkUnicodeRange(code, mode); if (code < 0x80) { if (to != null) { @@ -325,8 +318,8 @@ private static void appendUtf8(RopeBuilder to, int code, RubyEncoding[] enc, Rop } if (enc[0] == null) { enc[0] = Encodings.UTF_8; - } else if (!(enc[0].jcoding.isUTF8())) { - raisePreprocessError(str, "UTF-8 character in non UTF-8 regexp", mode); + } else if (enc[0] != Encodings.UTF_8) { + raisePreprocessError("UTF-8 character in non UTF-8 regexp", mode); } } } @@ -368,30 +361,29 @@ public static int utf8Decode(byte[] to, int p, int code) { } } - private static void checkUnicodeRange(int code, Rope str, RegexpSupport.ErrorMode mode) + private static void checkUnicodeRange(int code, RegexpSupport.ErrorMode mode) throws DeferredRaiseException { // Unicode is can be only 21 bits long, int is enough if ((0xd800 <= code && code <= 0xdfff) /* Surrogates */ || 0x10ffff < code) { - raisePreprocessError(str, "invalid Unicode range", mode); + raisePreprocessError("invalid Unicode range", mode); } } - private static int unescapeEscapedNonAscii(RopeBuilder to, byte[] bytes, int p, int end, - RubyEncoding enc, RubyEncoding[] encp, Rope str, RegexpSupport.ErrorMode mode) + private static int unescapeEscapedNonAscii(TStringBuilder to, byte[] bytes, int p, int end, + RubyEncoding enc, RubyEncoding[] encp, RegexpSupport.ErrorMode mode) throws DeferredRaiseException { byte[] chBuf = new byte[enc.jcoding.maxLength()]; int chLen = 0; - p = readEscapedByte(chBuf, chLen++, bytes, p, end, str, mode); + p = readEscapedByte(chBuf, chLen++, bytes, p, end, mode); while (chLen < enc.jcoding.maxLength() && - StringSupport - .MBCLEN_NEEDMORE_P(StringSupport.characterLength(enc.jcoding, CR_UNKNOWN, chBuf, 0, chLen))) { - p = readEscapedByte(chBuf, chLen++, bytes, p, end, str, mode); + StringSupport.MBCLEN_NEEDMORE_P(StringSupport.characterLength(enc, chBuf, 0, chLen))) { + p = readEscapedByte(chBuf, chLen++, bytes, p, end, mode); } - int cl = StringSupport.characterLength(enc.jcoding, CR_UNKNOWN, chBuf, 0, chLen); + int cl = StringSupport.characterLength(enc, chBuf, 0, chLen); if (cl == -1) { - raisePreprocessError(str, "invalid multibyte escape", mode); // MBCLEN_INVALID_P + raisePreprocessError("invalid multibyte escape", mode); // MBCLEN_INVALID_P } if (chLen > 1 || (chBuf[0] & 0x80) != 0) { @@ -402,7 +394,7 @@ private static int unescapeEscapedNonAscii(RopeBuilder to, byte[] bytes, int p, if (encp[0] == null) { encp[0] = enc; } else if (encp[0] != enc) { - raisePreprocessError(str, "escaped non ASCII character in UTF-8 regexp", mode); + raisePreprocessError("escaped non ASCII character in UTF-8 regexp", mode); } } else { if (to != null) { @@ -412,7 +404,7 @@ private static int unescapeEscapedNonAscii(RopeBuilder to, byte[] bytes, int p, return p; } - public static int raisePreprocessError(Rope str, String err, RegexpSupport.ErrorMode mode) + public static int raisePreprocessError(String err, RegexpSupport.ErrorMode mode) throws DeferredRaiseException { switch (mode) { case RAISE: @@ -429,17 +421,17 @@ public static int raisePreprocessError(Rope str, String err, RegexpSupport.Error @SuppressWarnings("fallthrough") @SuppressFBWarnings("SF") - public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int end, Rope str, + public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int end, RegexpSupport.ErrorMode mode) throws DeferredRaiseException { if (p == end || bytes[p++] != (byte) '\\') { - raisePreprocessError(str, "too short escaped multibyte character", mode); + raisePreprocessError("too short escaped multibyte character", mode); } boolean metaPrefix = false, ctrlPrefix = false; int code = 0; while (true) { if (p == end) { - raisePreprocessError(str, "too short escape sequence", mode); + raisePreprocessError("too short escape sequence", mode); } switch (bytes[p++]) { @@ -488,14 +480,14 @@ public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int e code = StringSupport.scanHex(bytes, p, hlen); int len = StringSupport.hexLength(bytes, p, hlen); if (len < 1) { - raisePreprocessError(str, "invalid hex escape", mode); + raisePreprocessError("invalid hex escape", mode); } p += len; break; case 'M': /* \M-X, \M-\C-X, \M-\cX */ if (metaPrefix) { - raisePreprocessError(str, "duplicate meta escape", mode); + raisePreprocessError("duplicate meta escape", mode); } metaPrefix = true; if (p + 1 < end && bytes[p++] == (byte) '-' && (bytes[p] & 0x80) == 0) { @@ -507,16 +499,16 @@ public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int e break; } } - raisePreprocessError(str, "too short meta escape", mode); + raisePreprocessError("too short meta escape", mode); case 'C': /* \C-X, \C-\M-X */ if (p == end || bytes[p++] != (byte) '-') { - raisePreprocessError(str, "too short control escape", mode); + raisePreprocessError("too short control escape", mode); } case 'c': /* \cX, \c\M-X */ if (ctrlPrefix) { - raisePreprocessError(str, "duplicate control escape", mode); + raisePreprocessError("duplicate control escape", mode); } ctrlPrefix = true; if (p < end && (bytes[p] & 0x80) == 0) { @@ -528,13 +520,13 @@ public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int e break; } } - raisePreprocessError(str, "too short control escape", mode); + raisePreprocessError("too short control escape", mode); default: - raisePreprocessError(str, "unexpected escape sequence", mode); + raisePreprocessError("unexpected escape sequence", mode); } // switch if (code < 0 || code > 0xff) { - raisePreprocessError(str, "invalid escape code", mode); + raisePreprocessError("invalid escape code", mode); } if (ctrlPrefix) { @@ -549,23 +541,23 @@ public static int readEscapedByte(byte[] to, int toP, byte[] bytes, int p, int e } // while } - public static void preprocessCheck(RopeWithEncoding ropeWithEncoding) throws DeferredRaiseException { + public static void preprocessCheck(TStringWithEncoding ropeWithEncoding) throws DeferredRaiseException { preprocess( - ropeWithEncoding.getRope(), + ropeWithEncoding, ropeWithEncoding.getEncoding(), new RubyEncoding[]{ null }, RegexpSupport.ErrorMode.RAISE); } - public static RopeBuilder preprocess(Rope str, RubyEncoding enc, RubyEncoding[] fixedEnc, + public static TStringBuilder preprocess(TStringWithEncoding str, RubyEncoding enc, RubyEncoding[] fixedEnc, RegexpSupport.ErrorMode mode) throws DeferredRaiseException { - RopeBuilder to = RopeBuilder.createRopeBuilder(str.byteLength()); + TStringBuilder to = TStringBuilder.create(str.byteLength()); - if (enc.jcoding.isAsciiCompatible()) { + if (enc.isAsciiCompatible) { fixedEnc[0] = null; } else { fixedEnc[0] = enc; - to.setEncoding(enc.jcoding); + to.setEncoding(enc); } boolean hasProperty = unescapeNonAscii(to, str, enc, fixedEnc, mode); @@ -573,37 +565,39 @@ public static RopeBuilder preprocess(Rope str, RubyEncoding enc, RubyEncoding[] fixedEnc[0] = enc; } if (fixedEnc[0] != null) { - to.setEncoding(fixedEnc[0].jcoding); + to.setEncoding(fixedEnc[0]); } return to; } - private static void preprocessLight(RopeWithEncoding str, RubyEncoding enc, RubyEncoding[] fixedEnc) + private static void preprocessLight(TStringWithEncoding str, RubyEncoding enc, RubyEncoding[] fixedEnc) throws DeferredRaiseException { - if (enc.jcoding.isAsciiCompatible()) { + if (enc.isAsciiCompatible) { fixedEnc[0] = null; } else { fixedEnc[0] = enc; } - boolean hasProperty = unescapeNonAscii(null, str.getRope(), enc, fixedEnc, RegexpSupport.ErrorMode.PREPROCESS); + boolean hasProperty = unescapeNonAscii(null, str, enc, fixedEnc, + RegexpSupport.ErrorMode.PREPROCESS); if (hasProperty && fixedEnc[0] == null) { fixedEnc[0] = enc; } } - public static RopeWithEncoding preprocessDRegexp(RubyContext context, RopeWithEncoding[] strings, + @TruffleBoundary + public static TStringWithEncoding preprocessDRegexp(RubyContext context, TStringWithEncoding[] strings, RegexpOptions options) throws DeferredRaiseException { assert strings.length > 0; - RopeBuilder builder = RopeOperations.toRopeBuilderCopy(strings[0].getRope()); + ByteArrayBuilder builder = ByteArrayBuilder.create(strings[0].getInternalByteArray()); RubyEncoding regexpEnc = processDRegexpElement(context, options, null, strings[0]); for (int i = 1; i < strings.length; i++) { - RopeWithEncoding str = strings[i]; + var str = strings[i]; regexpEnc = processDRegexpElement(context, options, regexpEnc, str); - builder.append(str.getRope()); + builder.append(str); } if (options.isEncodingNone()) { @@ -614,23 +608,20 @@ public static RopeWithEncoding preprocessDRegexp(RubyContext context, RopeWithEn } } - if (regexpEnc != null) { - builder.setEncoding(regexpEnc.jcoding); - } else { + if (regexpEnc == null) { regexpEnc = strings[0].getEncoding(); } - Rope rope = RopeOperations.ropeFromRopeBuilder(builder); - return new RopeWithEncoding(rope, regexpEnc); + return new TStringWithEncoding(builder.toTString(regexpEnc), regexpEnc); } @TruffleBoundary private static RubyEncoding processDRegexpElement(RubyContext context, RegexpOptions options, - RubyEncoding regexpEnc, RopeWithEncoding str) throws DeferredRaiseException { + RubyEncoding regexpEnc, TStringWithEncoding str) throws DeferredRaiseException { RubyEncoding strEnc = str.getEncoding(); if (options.isEncodingNone() && strEnc != Encodings.BINARY) { - if (str.getRope().getCodeRange() != CR_7BIT) { + if (!str.isAsciiOnly()) { throw new RaiseException( context, context.getCoreExceptions().regexpError( @@ -694,95 +685,76 @@ private static boolean all7Bit(byte[] bytes) { return true; } + /** \v */ private static final int QUOTED_V = 11; /** rb_reg_quote */ @TruffleBoundary - public static RopeWithEncoding quote19(Rope bs, RubyEncoding encoding) { + public static TStringWithEncoding quote19(ATStringWithEncoding bs) { final boolean asciiOnly = bs.isAsciiOnly(); - int p = 0; - int end = bs.byteLength(); - final byte[] bytes = bs.getBytes(); - final Encoding enc = bs.getEncoding(); - final CodeRange cr = bs.getCodeRange(); - - metaFound: do { - while (p < end) { - final int c; - final int cl; - if (enc.isAsciiCompatible()) { - cl = 1; - c = bytes[p] & 0xff; - } else { - cl = StringSupport.characterLength(enc, cr, bytes, p, end); - c = enc.mbcToCode(bytes, p, end); - } + boolean metaFound = false; - if (!Encoding.isAscii(c)) { - p += StringSupport.characterLength(enc, cr, bytes, p, end, true); - continue; - } + var iterator = bs.createCodePointIterator(); + while (iterator.hasNext()) { + final int c = iterator.nextUncached(); - switch (c) { - case '[': - case ']': - case '{': - case '}': - case '(': - case ')': - case '|': - case '-': - case '*': - case '.': - case '\\': - case '?': - case '+': - case '^': - case '$': - case ' ': - case '#': - case '\t': - case '\f': - case QUOTED_V: - case '\n': - case '\r': - break metaFound; - } - p += cl; + switch (c) { + case '[': + case ']': + case '{': + case '}': + case '(': + case ')': + case '|': + case '-': + case '*': + case '.': + case '\\': + case '?': + case '+': + case '^': + case '$': + case ' ': + case '#': + case '\t': + case '\f': + case QUOTED_V: + case '\n': + case '\r': + metaFound = true; + break; } + } + + if (!metaFound) { if (asciiOnly) { - return new RopeWithEncoding(RopeOperations.withEncoding(bs, USASCIIEncoding.INSTANCE), - Encodings.US_ASCII); + return bs.forceEncoding(Encodings.US_ASCII); + } else { + return bs.asImmutable(); } - return new RopeWithEncoding(bs, encoding); - } while (false); + } - RopeBuilder result = RopeBuilder.createRopeBuilder(end * 2); - result.setEncoding(asciiOnly ? USASCIIEncoding.INSTANCE : bs.getEncoding()); - RubyEncoding resultEncoding = asciiOnly ? Encodings.US_ASCII : encoding; - byte[] obytes = result.getUnsafeBytes(); - int op = p; - System.arraycopy(bytes, 0, obytes, 0, op); + var resultEncoding = asciiOnly ? Encodings.US_ASCII : bs.encoding; + var builder = TruffleStringBuilder.create(resultEncoding.tencoding, bs.byteLength() * 2); - while (p < end) { - final int c; - final int cl; - if (enc.isAsciiCompatible()) { - cl = 1; - c = bytes[p] & 0xff; - } else { - cl = StringSupport.characterLength(enc, cr, bytes, p, end); - c = enc.mbcToCode(bytes, p, end); - } + iterator = bs.createCodePointIterator(); + while (iterator.hasNext()) { + int p = iterator.getByteIndex(); + final int c = iterator.nextUncached(); - if (!Encoding.isAscii(c)) { - int n = StringSupport.characterLength(enc, cr, bytes, p, end, true); - while (n-- > 0) { - obytes[op++] = bytes[p++]; + if (c == -1) { + int after = iterator.getByteIndex(); + for (int i = p; i < after; i++) { + builder.appendByteUncached(bs.getByte(i)); } continue; } - p += cl; + + if (!(c >= 0 && Encoding.isAscii(c))) { + builder.appendCodePointUncached(c); + continue; + } + switch (c) { case '[': case ']': @@ -800,38 +772,37 @@ public static RopeWithEncoding quote19(Rope bs, RubyEncoding encoding) { case '^': case '$': case '#': - op += enc.codeToMbc('\\', obytes, op); - break; case ' ': - op += enc.codeToMbc('\\', obytes, op); - op += enc.codeToMbc(' ', obytes, op); - continue; + builder.appendCodePointUncached('\\'); + builder.appendCodePointUncached(c); + break; case '\t': - op += enc.codeToMbc('\\', obytes, op); - op += enc.codeToMbc('t', obytes, op); - continue; + builder.appendCodePointUncached('\\'); + builder.appendCodePointUncached('t'); + break; case '\n': - op += enc.codeToMbc('\\', obytes, op); - op += enc.codeToMbc('n', obytes, op); - continue; + builder.appendCodePointUncached('\\'); + builder.appendCodePointUncached('n'); + break; case '\r': - op += enc.codeToMbc('\\', obytes, op); - op += enc.codeToMbc('r', obytes, op); - continue; + builder.appendCodePointUncached('\\'); + builder.appendCodePointUncached('r'); + break; case '\f': - op += enc.codeToMbc('\\', obytes, op); - op += enc.codeToMbc('f', obytes, op); - continue; + builder.appendCodePointUncached('\\'); + builder.appendCodePointUncached('f'); + break; case QUOTED_V: - op += enc.codeToMbc('\\', obytes, op); - op += enc.codeToMbc('v', obytes, op); - continue; + builder.appendCodePointUncached('\\'); + builder.appendCodePointUncached('v'); + break; + default: + builder.appendCodePointUncached(c); + break; } - op += enc.codeToMbc(c, obytes, op); } - result.setLength(op); - return new RopeWithEncoding(RopeOperations.ropeFromRopeBuilder(result), resultEncoding); + return new TStringWithEncoding(builder.toStringUncached(), resultEncoding); } /** WARNING: This mutates options, so the caller should make sure it's a copy */ @@ -860,7 +831,7 @@ static RubyEncoding computeRegexpEncoding(RegexpOptions[] options, RubyEncoding return enc; } - public static void appendOptions(RopeBuilder to, RegexpOptions options) { + public static void appendOptions(TStringBuilder to, RegexpOptions options) { if (options.isMultiline()) { to.append((byte) 'm'); } @@ -873,27 +844,27 @@ public static void appendOptions(RopeBuilder to, RegexpOptions options) { } @SuppressWarnings("unused") - public RopeBuilder toRopeBuilder() { + public ByteArrayBuilder toByteArrayBuilder() { RegexpOptions newOptions = (RegexpOptions) options.clone(); + var byteArray = str.getInternalByteArray(); int p = 0; - int len = str.byteLength(); - byte[] bytes = str.getBytes(); + int len = byteArray.getLength(); - RopeBuilder result = RopeBuilder.createRopeBuilder(len); + TStringBuilder result = TStringBuilder.create(len); result.append((byte) '('); result.append((byte) '?'); again: do { - if (len >= 4 && bytes[p] == '(' && bytes[p + 1] == '?') { + if (len >= 4 && byteArray.get(p) == '(' && byteArray.get(p + 1) == '?') { boolean err = true; p += 2; if ((len -= 2) > 0) { do { - if (bytes[p] == 'm') { + if (byteArray.get(p) == 'm') { newOptions = newOptions.setMultiline(true); - } else if (bytes[p] == 'i') { + } else if (byteArray.get(p) == 'i') { newOptions = newOptions.setIgnorecase(true); - } else if (bytes[p] == 'x') { + } else if (byteArray.get(p) == 'x') { newOptions = newOptions.setExtended(true); } else { break; @@ -901,15 +872,15 @@ public RopeBuilder toRopeBuilder() { p++; } while (--len > 0); } - if (len > 1 && bytes[p] == '-') { + if (len > 1 && byteArray.get(p) == '-') { ++p; --len; do { - if (bytes[p] == 'm') { + if (byteArray.get(p) == 'm') { newOptions = newOptions.setMultiline(false); - } else if (bytes[p] == 'i') { + } else if (byteArray.get(p) == 'i') { newOptions = newOptions.setIgnorecase(false); - } else if (bytes[p] == 'x') { + } else if (byteArray.get(p) == 'x') { newOptions = newOptions.setExtended(false); } else { break; @@ -918,20 +889,21 @@ public RopeBuilder toRopeBuilder() { } while (--len > 0); } - if (bytes[p] == ')') { + if (byteArray.get(p) == ')') { --len; ++p; continue again; } - if (bytes[p] == ':' && bytes[p + len - 1] == ')') { + if (byteArray.get(p) == ':' && byteArray.get(p + len - 1) == ')') { + p++; try { new Regex( - bytes, - ++p, - p + (len -= 2), + byteArray.getArray(), + p + byteArray.getOffset(), + p + byteArray.getOffset() + (len -= 2), Option.DEFAULT, - str.getEncoding(), + str.encoding.jcoding, Syntax.DEFAULT, new RegexWarnCallback()); err = false; @@ -962,91 +934,58 @@ public RopeBuilder toRopeBuilder() { } } result.append((byte) ':'); - appendRegexpString19(result, str, p, len, null); + appendRegexpString(result, str, p, len); result.append((byte) ')'); - result.setEncoding(getEncoding()); + result.setEncoding(Encodings.getBuiltInEncoding(getEncoding())); return result; //return RubyString.newString(getRuntime(), result, getEncoding()).infectBy(this); } while (true); } @TruffleBoundary - public void appendRegexpString19(RopeBuilder to, Rope str, int start, int len, Encoding resEnc) { - int p = start; - int end = p + len; + public void appendRegexpString(TStringBuilder to, TStringWithEncoding fullStr, int start, int len) { + var str = fullStr.substring(start, len); - final CodeRange cr = str.getCodeRange(); - final Encoding enc = str.getEncoding(); - final byte[] bytes = str.getBytes(); - boolean needEscape = false; - while (p < end) { - final int c; - final int cl; - if (enc.isAsciiCompatible()) { - cl = 1; - c = bytes[p] & 0xff; - } else { - cl = StringSupport.characterLength(enc, cr, bytes, p, end); - c = enc.mbcToCode(bytes, p, end); - } + final var enc = str.encoding.jcoding; + var iterator = str.createCodePointIterator(); - if (!Encoding.isAscii(c)) { - p += StringSupport.characterLength(enc, cr, bytes, p, end, true); - } else if (c != '/' && enc.isPrint(c)) { - p += cl; - } else { + boolean needEscape = false; + while (iterator.hasNext()) { + final int c = iterator.nextUncached(); + if ((c >= 0 && Encoding.isAscii(c)) && (c == '/' || !enc.isPrint(c))) { needEscape = true; break; } } + if (!needEscape) { - to.append(bytes, start, len); + to.append(str); } else { - p = start; - while (p < end) { - final int c; - final int cl; - if (enc.isAsciiCompatible()) { - cl = 1; - c = bytes[p] & 0xff; - } else { - cl = StringSupport.characterLength(enc, cr, bytes, p, end); - c = enc.mbcToCode(bytes, p, end); - } - - if (c == '\\' && p + cl < end) { - int n = cl + StringSupport.characterLength(enc, cr, bytes, p + cl, end); - to.append(bytes, p, n); - p += n; - continue; + iterator = str.createCodePointIterator(); + while (iterator.hasNext()) { + final int p = iterator.getByteIndex(); + final int c = iterator.nextUncached(); + + if (c == '\\' && iterator.hasNext()) { + iterator.nextUncached(); + to.append(str, p, iterator.getByteIndex() - p); } else if (c == '/') { to.append((byte) '\\'); - to.append(bytes, p, cl); - } else if (!Encoding.isAscii(c)) { - int l = StringSupport.characterLength(enc, cr, bytes, p, end); - if (l <= 0) { - l = 1; + to.append(str, p, iterator.getByteIndex() - p); + } else if (!(c >= 0 && Encoding.isAscii(c))) { + if (c == -1) { to.append(StringUtils.formatASCIIBytes("\\x%02X", c)); - } else if (resEnc != null) { - int code = enc.mbcToCode(bytes, p, end); - to.append( - String.format(StringSupport.escapedCharFormat(code, enc.isUnicode()), code).getBytes( - StandardCharsets.US_ASCII)); } else { - to.append(bytes, p, l); + to.append(str, p, iterator.getByteIndex() - p); } - p += l; - - continue; } else if (enc.isPrint(c)) { - to.append(bytes, p, cl); + to.append(str, p, iterator.getByteIndex() - p); } else if (!enc.isSpace(c)) { to.append(StringUtils.formatASCIIBytes("\\x%02X", c)); } else { - to.append(bytes, p, cl); + to.append(str, p, iterator.getByteIndex() - p); } - p += cl; } } } diff --git a/src/main/java/org/truffleruby/core/regexp/InterpolatedRegexpNode.java b/src/main/java/org/truffleruby/core/regexp/InterpolatedRegexpNode.java index df7848d7210d..dea21a0d92d9 100644 --- a/src/main/java/org/truffleruby/core/regexp/InterpolatedRegexpNode.java +++ b/src/main/java/org/truffleruby/core/regexp/InterpolatedRegexpNode.java @@ -9,10 +9,11 @@ */ package org.truffleruby.core.regexp; +import com.oracle.truffle.api.strings.TruffleString; +import com.oracle.truffle.api.strings.TruffleString.AsTruffleStringNode; import org.truffleruby.core.cast.ToSNode; import org.truffleruby.core.regexp.InterpolatedRegexpNodeFactory.RegexpBuilderNodeGen; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeWithEncoding; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.language.NotOptimizedWarningNode; import org.truffleruby.language.RubyBaseNode; import org.truffleruby.language.RubyContextSourceNode; @@ -29,12 +30,12 @@ public class InterpolatedRegexpNode extends RubyContextSourceNode { @Children private final ToSNode[] children; @Child private RegexpBuilderNode builderNode; - @Child private RubyStringLibrary rubyStringLibrary; + private final RubyStringLibrary rubyStringLibrary = RubyStringLibrary.create(); + @Child private AsTruffleStringNode asTruffleStringNode = AsTruffleStringNode.create(); public InterpolatedRegexpNode(ToSNode[] children, RegexpOptions options) { this.children = children; builderNode = RegexpBuilderNode.create(options); - rubyStringLibrary = RubyStringLibrary.getFactory().createDispatched(2); } @Override @@ -43,18 +44,20 @@ public Object execute(VirtualFrame frame) { } @ExplodeLoop - protected RopeWithEncoding[] executeChildren(VirtualFrame frame) { - RopeWithEncoding[] values = new RopeWithEncoding[children.length]; + protected TStringWithEncoding[] executeChildren(VirtualFrame frame) { + TStringWithEncoding[] values = new TStringWithEncoding[children.length]; for (int i = 0; i < children.length; i++) { final Object value = children[i].execute(frame); - values[i] = new RopeWithEncoding(rubyStringLibrary.getRope(value), rubyStringLibrary.getEncoding(value)); + values[i] = new TStringWithEncoding(asTruffleStringNode, + rubyStringLibrary.getTString(value), + rubyStringLibrary.getEncoding(value)); } return values; } public abstract static class RegexpBuilderNode extends RubyBaseNode { - @Child private RopeNodes.EqualNode ropesEqualNode = RopeNodes.EqualNode.create(); + @Child private TruffleString.EqualNode equalNode = TruffleString.EqualNode.create(); private final RegexpOptions options; public static RegexpBuilderNode create(RegexpOptions options) { @@ -65,29 +68,30 @@ public RegexpBuilderNode(RegexpOptions options) { this.options = options; } - public abstract Object execute(RopeWithEncoding[] parts); + public abstract Object execute(TStringWithEncoding[] parts); @Specialization(guards = "ropesWithEncodingsMatch(cachedParts, parts)", limit = "getDefaultCacheLimit()") - protected Object executeFast(RopeWithEncoding[] parts, - @Cached(value = "parts", dimensions = 1) RopeWithEncoding[] cachedParts, + protected Object executeFast(TStringWithEncoding[] parts, + @Cached(value = "parts", dimensions = 1) TStringWithEncoding[] cachedParts, @Cached("createRegexp(cachedParts)") RubyRegexp regexp) { return regexp; } @Specialization(replaces = "executeFast") - protected Object executeSlow(RopeWithEncoding[] parts, + protected Object executeSlow(TStringWithEncoding[] parts, @Cached NotOptimizedWarningNode notOptimizedWarningNode) { notOptimizedWarningNode.warn("unstable interpolated regexps are not optimized"); return createRegexp(parts); } @ExplodeLoop - protected boolean ropesWithEncodingsMatch(RopeWithEncoding[] a, RopeWithEncoding[] b) { + protected boolean ropesWithEncodingsMatch(TStringWithEncoding[] a, TStringWithEncoding[] b) { for (int i = 0; i < a.length; i++) { - if (!ropesEqualNode.execute(a[i].getRope(), b[i].getRope())) { + var aEncoding = a[i].encoding; + if (aEncoding != b[i].encoding) { return false; } - if (a[i].getEncoding() != b[i].getEncoding()) { + if (!equalNode.execute(a[i].tstring, b[i].tstring, aEncoding.tencoding)) { return false; } } @@ -95,12 +99,10 @@ protected boolean ropesWithEncodingsMatch(RopeWithEncoding[] a, RopeWithEncoding } @TruffleBoundary - protected RubyRegexp createRegexp(RopeWithEncoding[] strings) { + protected RubyRegexp createRegexp(TStringWithEncoding[] strings) { try { - final RopeWithEncoding preprocessed; - preprocessed = ClassicRegexp.preprocessDRegexp(getContext(), strings, options); - return RubyRegexp - .create(getLanguage(), preprocessed.getRope(), preprocessed.getEncoding(), options, this); + var preprocessed = ClassicRegexp.preprocessDRegexp(getContext(), strings, options); + return RubyRegexp.create(getLanguage(), preprocessed.tstring, preprocessed.encoding, options, this); } catch (DeferredRaiseException dre) { throw dre.getException(getContext()); } diff --git a/src/main/java/org/truffleruby/core/regexp/MatchDataNodes.java b/src/main/java/org/truffleruby/core/regexp/MatchDataNodes.java index 6b7ee384429c..0b03c007bcb4 100644 --- a/src/main/java/org/truffleruby/core/regexp/MatchDataNodes.java +++ b/src/main/java/org/truffleruby/core/regexp/MatchDataNodes.java @@ -16,7 +16,8 @@ import com.oracle.truffle.api.interop.InteropException; import com.oracle.truffle.api.interop.InteropLibrary; import com.oracle.truffle.api.profiles.LoopConditionProfile; -import org.jcodings.Encoding; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import org.joni.NameEntry; import org.joni.Regex; import org.joni.Region; @@ -31,13 +32,12 @@ import org.truffleruby.core.array.ArrayUtils; import org.truffleruby.core.array.RubyArray; import org.truffleruby.core.cast.ToIntNode; +import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.range.RubyIntRange; import org.truffleruby.core.regexp.MatchDataNodesFactory.ValuesNodeFactory; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.core.string.RubyString; +import org.truffleruby.core.string.StringHelperNodes.SingleByteOptimizableNode; import org.truffleruby.core.string.StringSupport; import org.truffleruby.core.string.StringUtils; import org.truffleruby.core.symbol.RubySymbol; @@ -99,10 +99,11 @@ private static void forceLazyMatchData(RubyMatchData matchData, InteropLibrary i } @TruffleBoundary - private static Region getCharOffsetsManyRegs(RubyMatchData matchData, Rope source, Encoding encoding) { + private static Region getCharOffsetsManyRegs(RubyMatchData matchData, AbstractTruffleString source, + RubyEncoding encoding) { // Taken from org.jruby.RubyMatchData - assert !encoding.isSingleByte() : "Should be checked by callers"; + assert !encoding.isSingleByte : "Should be checked by callers"; final Region regs = matchData.region; int numRegs = regs.numRegs; @@ -144,12 +145,13 @@ private static Region getCharOffsetsManyRegs(RubyMatchData matchData, Rope sourc } @TruffleBoundary - private static void updatePairs(Rope source, Encoding encoding, Pair[] pairs) { + private static void updatePairs(AbstractTruffleString source, RubyEncoding encoding, Pair[] pairs) { // Taken from org.jruby.RubyMatchData Arrays.sort(pairs); - byte[] bytes = source.getBytes(); - int p = 0; + var byteArray = source.getInternalByteArrayUncached(encoding.tencoding); + byte[] bytes = byteArray.getArray(); + int p = byteArray.getOffset(); int s = p; int c = 0; @@ -162,19 +164,19 @@ private static void updatePairs(Rope source, Encoding encoding, Pair[] pairs) { } @TruffleBoundary - private static Region createCharOffsets(RubyMatchData matchData, Rope source) { - final Encoding enc = source.getEncoding(); - final Region charOffsets = getCharOffsetsManyRegs(matchData, source, enc); + private static Region createCharOffsets(RubyMatchData matchData, AbstractTruffleString source, + RubyEncoding encoding) { + final Region charOffsets = getCharOffsetsManyRegs(matchData, source, encoding); matchData.charOffsets = charOffsets; return charOffsets; } - private static Region getCharOffsets(RubyMatchData matchData, Rope sourceRope) { + private static Region getCharOffsets(RubyMatchData matchData, AbstractTruffleString source, RubyEncoding encoding) { final Region charOffsets = matchData.charOffsets; if (charOffsets != null) { return charOffsets; } else { - return createCharOffsets(matchData, sourceRope); + return createCharOffsets(matchData, source, encoding); } } @@ -238,7 +240,6 @@ public abstract static class GetIndexNode extends CoreMethodArrayArgumentsNode { @Child private RegexpNode regexpNode; @Child private ValuesNode getValuesNode = ValuesNode.create(); - @Child private RopeNodes.SubstringNode substringNode = RopeNodes.SubstringNode.create(); public static GetIndexNode create(RubyNode... nodes) { return MatchDataNodesFactory.GetIndexNodeFactory.create(nodes); @@ -248,12 +249,13 @@ public static GetIndexNode create(RubyNode... nodes) { @Specialization protected Object getIndex(RubyMatchData matchData, int index, NotProvided length, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, + @Cached RubyStringLibrary strings, @Cached ConditionProfile normalizedIndexProfile, @Cached ConditionProfile indexOutOfBoundsProfile, @Cached ConditionProfile lazyProfile, @CachedLibrary(limit = "getInteropCacheLimit()") InteropLibrary interop, - @Cached ConditionProfile hasValueProfile) { + @Cached ConditionProfile hasValueProfile, + @Cached TruffleString.SubstringByteIndexNode substringNode) { final Region region = matchData.region; if (normalizedIndexProfile.profile(index < 0)) { @@ -268,16 +270,7 @@ protected Object getIndex(RubyMatchData matchData, int index, NotProvided length if (hasValueProfile.profile(start >= 0 && end >= 0)) { final Object source = matchData.source; - final Rope sourceRope = strings.getRope(source); - final Rope rope = substringNode.executeSubstring(sourceRope, start, end - start); - final RubyString string = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - rope, - strings.getEncoding(source)); - AllocationTracing.trace(string, this); - return string; + return createSubString(substringNode, strings, source, start, end - start); } else { return nil; } @@ -325,7 +318,8 @@ protected Object getIndexSymbolKnownRegexp(RubyMatchData matchData, RubySymbol s if (backRefs == 1) { return executeGetIndex(matchData, backRefIndex, NotProvided.INSTANCE); } else { - final int i = getBackRef(matchData, cachedRegexp, cachedSymbol.getRope(), lazyProfile, libInterop); + final int i = getBackRef(matchData, cachedRegexp, cachedSymbol.tstring, cachedSymbol.encoding, + lazyProfile, libInterop); return executeGetIndex(matchData, i, NotProvided.INSTANCE); } } @@ -336,18 +330,20 @@ protected Object getIndexSymbol(RubyMatchData matchData, RubySymbol symbol, NotP @CachedLibrary(limit = "getInteropCacheLimit()") InteropLibrary libInterop) { return executeGetIndex( matchData, - getBackRef(matchData, getRegexp(matchData), symbol.getRope(), lazyProfile, libInterop), + getBackRef(matchData, getRegexp(matchData), symbol.tstring, symbol.encoding, lazyProfile, + libInterop), NotProvided.INSTANCE); } - @Specialization(guards = "libIndex.isRubyString(index)") + @Specialization(guards = "libIndex.isRubyString(index)", limit = "1") protected Object getIndexString(RubyMatchData matchData, Object index, NotProvided length, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libIndex, + @Cached RubyStringLibrary libIndex, @Cached ConditionProfile lazyProfile, @CachedLibrary(limit = "getInteropCacheLimit()") InteropLibrary libInterop) { return executeGetIndex( matchData, - getBackRef(matchData, getRegexp(matchData), libIndex.getRope(index), lazyProfile, libInterop), + getBackRef(matchData, getRegexp(matchData), libIndex.getTString(index), libIndex.getEncoding(index), + lazyProfile, libInterop), NotProvided.INSTANCE); } @@ -386,12 +382,14 @@ protected static NameEntry findNameEntry(RubyRegexp regexp, RubySymbol symbol) { Regex regex = regexp.regex; if (regex.numberOfNames() > 0) { - Rope rope = symbol.getRope(); + var byteArray = symbol.tstring.getInternalByteArrayUncached(symbol.encoding.tencoding); for (Iterator i = regex.namedBackrefIterator(); i.hasNext();) { final NameEntry e = i.next(); - if (bytesEqual(rope.getBytes(), rope.byteLength(), e.name, e.nameP, e.nameEnd)) { + int nameLen = e.nameEnd - e.nameP; + if (nameLen == byteArray.getLength() && ArrayUtils.regionEquals(byteArray.getArray(), + byteArray.getOffset(), e.name, e.nameP, byteArray.getLength())) { return e; } } @@ -407,29 +405,31 @@ protected RubyRegexp getRegexp(RubyMatchData matchData) { return regexpNode.executeGetRegexp(matchData); } - private int getBackRef(RubyMatchData matchData, RubyRegexp regexp, Rope name, + private int getBackRef(RubyMatchData matchData, RubyRegexp regexp, AbstractTruffleString name, RubyEncoding enc, ConditionProfile lazyProfile, InteropLibrary libInterop) { if (lazyProfile.profile(matchData.tRegexResult != null)) { // force the calculation of lazy capture group results before invoking nameToBackrefNumber() forceLazyMatchData(matchData, libInterop); } - return nameToBackrefNumber(matchData, regexp, name); + return nameToBackrefNumber(matchData, regexp, name, enc); } @TruffleBoundary - private int nameToBackrefNumber(RubyMatchData matchData, RubyRegexp regexp, Rope name) { + private int nameToBackrefNumber(RubyMatchData matchData, RubyRegexp regexp, AbstractTruffleString name, + RubyEncoding enc) { + var byteArray = name.getInternalByteArrayUncached(enc.tencoding); try { return regexp.regex.nameToBackrefNumber( - name.getBytes(), - 0, - name.byteLength(), + byteArray.getArray(), + byteArray.getOffset(), + byteArray.getEnd(), matchData.region); } catch (ValueException e) { throw new RaiseException( getContext(), coreExceptions().indexError( StringUtils - .format("undefined group name reference: %s", RopeOperations.decodeRope(name)), + .format("undefined group name reference: %s", name.toJavaStringUncached()), this)); } } @@ -443,17 +443,6 @@ protected static int numBackRefs(NameEntry nameEntry) { protected static int backRefIndex(NameEntry nameEntry) { return nameEntry == null ? 0 : nameEntry.getBackRefs()[0]; } - - @TruffleBoundary - private static boolean bytesEqual(byte[] bytes, int byteLength, byte[] name, int nameP, int nameEnd) { - if (bytes == name && nameP == 0 && byteLength == nameEnd) { - return true; - } else if (nameEnd - nameP != byteLength) { - return false; - } else { - return ArrayUtils.regionEquals(bytes, 0, name, nameP, byteLength); - } - } } @Primitive(name = "match_data_begin", lowerFixnum = 1) @@ -464,8 +453,8 @@ protected Object begin(RubyMatchData matchData, int index, @Cached ConditionProfile lazyProfile, @Cached ConditionProfile negativeBeginProfile, @Cached ConditionProfile multiByteCharacterProfile, - @Cached RopeNodes.SingleByteOptimizableNode singleByteOptimizableNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, + @Cached SingleByteOptimizableNode singleByteOptimizableNode, + @Cached RubyStringLibrary strings, @CachedLibrary(limit = "getInteropCacheLimit()") InteropLibrary interop) { final int begin = getStart(matchData, index, lazyProfile, interop); @@ -473,9 +462,12 @@ protected Object begin(RubyMatchData matchData, int index, return nil; } - final Rope matchDataSourceRope = strings.getRope(matchData.source); - if (multiByteCharacterProfile.profile(!singleByteOptimizableNode.execute(matchDataSourceRope))) { - return getCharOffsets(matchData, matchDataSourceRope).beg[index]; + var matchDataSource = strings.getTString(matchData.source); + var encoding = strings.getEncoding(matchData.source); + + if (multiByteCharacterProfile.profile( + !singleByteOptimizableNode.execute(matchDataSource, encoding))) { + return getCharOffsets(matchData, matchDataSource, encoding).beg[index]; } return begin; @@ -497,8 +489,6 @@ protected boolean inBounds(RubyMatchData matchData, int index) { public abstract static class ValuesNode extends CoreMethodArrayArgumentsNode { - @Child private RopeNodes.SubstringNode substringNode = RopeNodes.SubstringNode.create(); - public static ValuesNode create() { return ValuesNodeFactory.create(null); } @@ -507,13 +497,13 @@ public static ValuesNode create() { @Specialization protected Object[] getValues(RubyMatchData matchData, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, + @Cached RubyStringLibrary strings, @Cached ConditionProfile lazyProfile, @CachedLibrary(limit = "getInteropCacheLimit()") InteropLibrary interop, @Cached ConditionProfile hasValueProfile, - @Cached LoopConditionProfile loopProfile) { + @Cached LoopConditionProfile loopProfile, + @Cached TruffleString.SubstringByteIndexNode substringNode) { final Object source = matchData.source; - final Rope sourceRope = strings.getRope(source); final Region region = matchData.region; final Object[] values = new Object[region.numRegs]; @@ -524,15 +514,7 @@ protected Object[] getValues(RubyMatchData matchData, final int end = getEnd(matchData, n, lazyProfile, interop); if (hasValueProfile.profile(start >= 0 && end >= 0)) { - final Rope rope = substringNode.executeSubstring(sourceRope, start, end - start); - final RubyString string = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - rope, - strings.getEncoding(source)); - AllocationTracing.trace(string, this); - values[n] = string; + values[n] = createSubString(substringNode, strings, source, start, end - start); } else { values[n] = nil; } @@ -556,8 +538,8 @@ protected Object end(RubyMatchData matchData, int index, @Cached ConditionProfile lazyProfile, @Cached ConditionProfile negativeEndProfile, @Cached ConditionProfile multiByteCharacterProfile, - @Cached RopeNodes.SingleByteOptimizableNode singleByteOptimizableNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, + @Cached SingleByteOptimizableNode singleByteOptimizableNode, + @Cached RubyStringLibrary strings, @CachedLibrary(limit = "getInteropCacheLimit()") InteropLibrary interop) { final int end = getEnd(matchData, index, lazyProfile, interop); @@ -565,9 +547,12 @@ protected Object end(RubyMatchData matchData, int index, return nil; } - final Rope matchDataSourceRope = strings.getRope(matchData.source); - if (multiByteCharacterProfile.profile(!singleByteOptimizableNode.execute(matchDataSourceRope))) { - return getCharOffsets(matchData, matchDataSourceRope).end[index]; + var matchDataSource = strings.getTString(matchData.source); + var encoding = strings.getEncoding(matchData.source); + + if (multiByteCharacterProfile.profile( + !singleByteOptimizableNode.execute(matchDataSource, encoding))) { + return getCharOffsets(matchData, matchDataSource, encoding).end[index]; } return end; @@ -643,55 +628,37 @@ protected int length(RubyMatchData matchData) { @CoreMethod(names = "pre_match") public abstract static class PreMatchNode extends CoreMethodArrayArgumentsNode { - @Child private RopeNodes.SubstringNode substringNode = RopeNodes.SubstringNode.create(); - public abstract RubyString execute(RubyMatchData matchData); @Specialization protected RubyString preMatch(RubyMatchData matchData, @Cached ConditionProfile lazyProfile, @CachedLibrary(limit = "getInteropCacheLimit()") InteropLibrary interop, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings, + @Cached TruffleString.SubstringByteIndexNode substringNode) { Object source = matchData.source; - Rope sourceRope = strings.getRope(source); final int length = getStart(matchData, 0, lazyProfile, interop); - final Rope rope = substringNode.executeSubstring(sourceRope, 0, length); - final RubyString string = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - rope, - strings.getEncoding(source)); - AllocationTracing.trace(string, this); - return string; + return createSubString(substringNode, strings, source, 0, length); } } @CoreMethod(names = "post_match") public abstract static class PostMatchNode extends CoreMethodArrayArgumentsNode { - @Child private RopeNodes.SubstringNode substringNode = RopeNodes.SubstringNode.create(); - public abstract RubyString execute(RubyMatchData matchData); @Specialization protected RubyString postMatch(RubyMatchData matchData, @Cached ConditionProfile lazyProfile, @CachedLibrary(limit = "getInteropCacheLimit()") InteropLibrary interop, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings, + @Cached TruffleString.SubstringByteIndexNode substringNode) { Object source = matchData.source; - Rope sourceRope = strings.getRope(source); + var tstring = strings.getTString(source); + var encoding = strings.getEncoding(source); final int start = getEnd(matchData, 0, lazyProfile, interop); - int length = sourceRope.byteLength() - start; - Rope rope = substringNode.executeSubstring(sourceRope, start, length); - final RubyString string = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - rope, - strings.getEncoding(source)); - AllocationTracing.trace(string, this); - return string; + int length = tstring.byteLength(encoding.tencoding) - start; + return createSubString(substringNode, tstring, encoding, start, length); } } diff --git a/src/main/java/org/truffleruby/core/regexp/RegexpCacheKey.java b/src/main/java/org/truffleruby/core/regexp/RegexpCacheKey.java index e41ccd22d3dc..364071f663b3 100644 --- a/src/main/java/org/truffleruby/core/regexp/RegexpCacheKey.java +++ b/src/main/java/org/truffleruby/core/regexp/RegexpCacheKey.java @@ -11,41 +11,31 @@ import java.util.Objects; -import org.jcodings.specific.ASCIIEncoding; - -import org.truffleruby.core.encoding.Encodings; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.NativeRope; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.core.rope.RopeWithEncoding; +import org.truffleruby.core.string.TStringBuilder; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.language.control.DeferredRaiseException; public final class RegexpCacheKey { - public static RegexpCacheKey calculate(RopeWithEncoding rope, RegexpOptions options) throws DeferredRaiseException { - if (options.isEncodingNone()) { - rope = new RopeWithEncoding( - RopeOperations.withEncoding(rope.getRope(), ASCIIEncoding.INSTANCE), - Encodings.BINARY); - } - RubyEncoding fixedEnc[] = new RubyEncoding[]{ null }; - RopeBuilder processed = ClassicRegexp - .preprocess(rope.getRope(), rope.getEncoding(), fixedEnc, RegexpSupport.ErrorMode.RAISE); - RegexpOptions optionsArray[] = new RegexpOptions[]{ options }; - RubyEncoding enc = ClassicRegexp.computeRegexpEncoding(optionsArray, rope.getEncoding(), fixedEnc); + public static RegexpCacheKey calculate(TStringWithEncoding source, RegexpOptions options) + throws DeferredRaiseException { + RubyEncoding[] fixedEnc = new RubyEncoding[]{ null }; + TStringBuilder processed = ClassicRegexp.preprocess(source, source.getEncoding(), fixedEnc, + RegexpSupport.ErrorMode.RAISE); + RegexpOptions[] optionsArray = new RegexpOptions[]{ options }; + RubyEncoding enc = ClassicRegexp.computeRegexpEncoding(optionsArray, source.getEncoding(), fixedEnc); - return new RegexpCacheKey(processed.toRope(), enc, optionsArray[0]); + return new RegexpCacheKey(processed.toTString(), enc, optionsArray[0]); } - public final Rope rope; + public final TruffleString tstring; public final RubyEncoding encoding; public final RegexpOptions options; - private RegexpCacheKey(Rope rope, RubyEncoding encoding, RegexpOptions options) { - assert !(rope instanceof NativeRope); - this.rope = rope; + private RegexpCacheKey(TruffleString tstring, RubyEncoding encoding, RegexpOptions options) { + this.tstring = tstring; this.encoding = encoding; this.options = options; } @@ -54,28 +44,20 @@ public RegexpOptions getOptions() { return options; } - public Rope getRope() { - return rope; - } - public RubyEncoding getEncoding() { return encoding; } - public int getJoniOptions() { - return options.toJoniOptions(); - } - @Override public int hashCode() { - return Objects.hash(rope, encoding, options); + return Objects.hash(encoding, tstring, options); } @Override public boolean equals(Object o) { if (o instanceof RegexpCacheKey) { final RegexpCacheKey other = (RegexpCacheKey) o; - return rope.equals(other.rope) && encoding == other.encoding && options.equals(other.options); + return encoding == other.encoding && tstring.equals(other.tstring) && options.equals(other.options); } else { return false; } @@ -83,8 +65,6 @@ public boolean equals(Object o) { @Override public String toString() { - return '/' + RopeOperations.decodeOrEscapeBinaryRope(rope) + '/' + - options.toOptionsString() + - " -- " + RopeOperations.decodeOrEscapeBinaryRope(encoding.name.rope); + return '/' + tstring.toString() + '/' + options.toOptionsString() + " -- " + encoding.name; } } diff --git a/src/main/java/org/truffleruby/core/regexp/RegexpNodes.java b/src/main/java/org/truffleruby/core/regexp/RegexpNodes.java index 680050c7a274..8d01225c54cd 100644 --- a/src/main/java/org/truffleruby/core/regexp/RegexpNodes.java +++ b/src/main/java/org/truffleruby/core/regexp/RegexpNodes.java @@ -12,9 +12,8 @@ import java.util.Arrays; import java.util.Iterator; -import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.profiles.BranchProfile; -import org.jcodings.specific.UTF8Encoding; +import com.oracle.truffle.api.strings.TruffleString; import org.joni.NameEntry; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; @@ -24,14 +23,12 @@ import org.truffleruby.core.array.RubyArray; import org.truffleruby.core.cast.ToStrNode; import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.regexp.RegexpNodesFactory.ToSNodeFactory; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.core.rope.RopeWithEncoding; +import org.truffleruby.core.string.ATStringWithEncoding; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.language.Visibility; import org.truffleruby.language.control.DeferredRaiseException; @@ -49,20 +46,16 @@ public abstract class RegexpNodes { @CoreMethod(names = "hash") public abstract static class HashNode extends CoreMethodArrayArgumentsNode { - @Specialization protected int hash(RubyRegexp regexp) { - int options = regexp.regex.getOptions() & - ~32 /* option n, NO_ENCODING in common/regexp.rb */; + int options = regexp.regex.getOptions() & ~32 /* option n, NO_ENCODING in common/regexp.rb */; return options ^ regexp.source.hashCode(); } - } @CoreMethod(names = { "quote", "escape" }, onSingleton = true, required = 1) public abstract static class QuoteNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode; @Child private ToStrNode toStrNode; @Child private QuoteNode quoteNode; @@ -72,19 +65,15 @@ public static QuoteNode create() { return RegexpNodesFactory.QuoteNodeFactory.create(null); } - @Specialization(guards = "libRaw.isRubyString(raw)") + @Specialization(guards = "libRaw.isRubyString(raw)", limit = "1") protected RubyString quoteString(Object raw, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libRaw) { - final Rope rope = libRaw.getRope(raw); - final RopeWithEncoding ropeQuotedResult = ClassicRegexp.quote19(rope, libRaw.getEncoding(raw)); - return getMakeStringNode().fromRope(ropeQuotedResult.getRope(), ropeQuotedResult.getEncoding()); + @Cached RubyStringLibrary libRaw) { + return createString(ClassicRegexp.quote19(new ATStringWithEncoding(libRaw, raw))); } @Specialization protected RubyString quoteSymbol(RubySymbol raw) { - return doQuoteString( - getMakeStringNode() - .executeMake(raw.getString(), Encodings.UTF_8, CodeRange.CR_UNKNOWN)); + return doQuoteString(createString(raw.tstring, raw.encoding)); } @Fallback @@ -104,33 +93,19 @@ private RubyString doQuoteString(Object raw) { } return quoteNode.execute(raw); } - - private StringNodes.MakeStringNode getMakeStringNode() { - if (makeStringNode == null) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - makeStringNode = insert(StringNodes.MakeStringNode.create()); - } - - return makeStringNode; - } } @CoreMethod(names = "source") public abstract static class SourceNode extends CoreMethodArrayArgumentsNode { - @Specialization - protected RubyString source(RubyRegexp regexp, - @Cached StringNodes.MakeStringNode makeStringNode) { - return makeStringNode.fromRope(regexp.source, regexp.encoding); + protected RubyString source(RubyRegexp regexp) { + return createString(regexp.source, regexp.encoding); } - } @CoreMethod(names = "to_s") public abstract static class ToSNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); - public static ToSNode create() { return ToSNodeFactory.create(null); } @@ -140,29 +115,29 @@ public static ToSNode create() { @Specialization(guards = "regexp.regex == cachedRegexp.regex") protected RubyString toSCached(RubyRegexp regexp, @Cached("regexp") RubyRegexp cachedRegexp, - @Cached("createRope(cachedRegexp)") Rope rope) { - return makeStringNode.fromRope(rope, Encodings.getBuiltInEncoding(rope.getEncoding().getIndex())); + @Cached("createTString(cachedRegexp)") TStringWithEncoding string) { + return createString(string); } @Specialization protected RubyString toS(RubyRegexp regexp) { - final Rope rope = createRope(regexp); - return makeStringNode.fromRope(rope, Encodings.getBuiltInEncoding(rope.getEncoding().getIndex())); + return createString(createTString(regexp)); } @TruffleBoundary - protected Rope createRope(RubyRegexp regexp) { + protected TStringWithEncoding createTString(RubyRegexp regexp) { final ClassicRegexp classicRegexp; + try { classicRegexp = new ClassicRegexp( getContext(), - regexp.source, - regexp.encoding, + new TStringWithEncoding(regexp.source, regexp.encoding), RegexpOptions.fromEmbeddedOptions(regexp.regex.getOptions())); } catch (DeferredRaiseException dre) { throw dre.getException(getContext()); } - return classicRegexp.toRopeBuilder().toRope(); + + return classicRegexp.toByteArrayBuilder().toTStringWithEnc(regexp.encoding); } } @@ -183,8 +158,8 @@ protected RubyArray regexpNames(RubyRegexp regexp) { final NameEntry e = iter.next(); final byte[] bytes = Arrays.copyOfRange(e.name, e.nameP, e.nameEnd); - final Rope rope = RopeOperations.create(bytes, UTF8Encoding.INSTANCE, CodeRange.CR_UNKNOWN); - final RubySymbol name = getSymbol(rope, Encodings.UTF_8); + var tstring = TStringUtils.fromByteArray(bytes, Encodings.UTF_8); + final RubySymbol name = getSymbol(tstring, Encodings.UTF_8); final int[] backrefs = e.getBackRefs(); final RubyArray backrefsRubyArray = createArray(backrefs); @@ -219,15 +194,17 @@ protected boolean fixedEncoding(RubyRegexp regexp) { @Primitive(name = "regexp_compile", lowerFixnum = 1) public abstract static class RegexpCompileNode extends PrimitiveArrayArgumentsNode { - @Specialization(guards = "libPattern.isRubyString(pattern)") + @Specialization(guards = "libPattern.isRubyString(pattern)", limit = "1") protected RubyRegexp initialize(Object pattern, int options, @Cached BranchProfile errorProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libPattern) { + @Cached TruffleString.AsTruffleStringNode asTruffleStringNode, + @Cached RubyStringLibrary libPattern) { + var encoding = libPattern.getEncoding(pattern); try { return RubyRegexp.create( getLanguage(), - libPattern.getRope(pattern), - libPattern.getEncoding(pattern), + asTruffleStringNode.execute(libPattern.getTString(pattern), encoding.tencoding), + encoding, RegexpOptions.fromEmbeddedOptions(options), this); } catch (DeferredRaiseException dre) { diff --git a/src/main/java/org/truffleruby/core/regexp/RubyRegexp.java b/src/main/java/org/truffleruby/core/regexp/RubyRegexp.java index 3245cc65ce72..f0e0d92fcfe1 100644 --- a/src/main/java/org/truffleruby/core/regexp/RubyRegexp.java +++ b/src/main/java/org/truffleruby/core/regexp/RubyRegexp.java @@ -18,37 +18,40 @@ import com.oracle.truffle.api.library.ExportMessage; import com.oracle.truffle.api.nodes.Node; +import com.oracle.truffle.api.strings.TruffleString; import org.joni.Regex; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; +import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.kernel.KernelNodes; import org.truffleruby.core.klass.RubyClass; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeWithEncoding; import org.truffleruby.language.ImmutableRubyObjectNotCopyable; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.language.control.DeferredRaiseException; import org.truffleruby.language.dispatch.DispatchNode; @ExportLibrary(InteropLibrary.class) -public class RubyRegexp extends ImmutableRubyObjectNotCopyable implements TruffleObject, Comparable { +public final class RubyRegexp extends ImmutableRubyObjectNotCopyable implements TruffleObject, Comparable { @TruffleBoundary public static RubyRegexp create(RubyLanguage language, - Rope setSource, - RubyEncoding setSourceEncoding, + TruffleString source, + RubyEncoding sourceEncoding, RegexpOptions regexpOptions, Node currentNode) throws DeferredRaiseException { - final RegexpCacheKey key = RegexpCacheKey.calculate( - new RopeWithEncoding(setSource, setSourceEncoding), - regexpOptions); + var strEnc = new TStringWithEncoding(source, sourceEncoding); + if (regexpOptions.isEncodingNone()) { + strEnc = strEnc.forceEncoding(Encodings.BINARY); + } + + final RegexpCacheKey key = RegexpCacheKey.calculate(strEnc, regexpOptions); RubyRegexp regexp = language.getRegexp(key); if (regexp == null) { - RegexpOptions optionsArray[] = new RegexpOptions[]{ regexpOptions }; + var optionsArray = new RegexpOptions[]{ regexpOptions }; final Regex regex = TruffleRegexpNodes.compile( - language, null, - new RopeWithEncoding(setSource, setSourceEncoding), + strEnc, optionsArray, currentNode); regexp = new RubyRegexp(regex, optionsArray[0]); @@ -65,7 +68,7 @@ public static RubyRegexp create(RubyLanguage language, } public final Regex regex; - public final Rope source; + public final TruffleString source; public final RubyEncoding encoding; public final RegexpOptions options; public final EncodingCache cachedEncodings; @@ -76,10 +79,9 @@ private RubyRegexp(Regex regex, RegexpOptions options) { // in the Regex object as the "user object". Since ropes are immutable, we need to take this updated copy when // constructing the final regexp. this.regex = regex; - final RopeWithEncoding ropeWithEncoding = (RopeWithEncoding) regex.getUserObject(); - this.source = ropeWithEncoding.getRope(); + final TStringWithEncoding ropeWithEncoding = (TStringWithEncoding) regex.getUserObject(); + this.source = ropeWithEncoding.tstring; this.encoding = ropeWithEncoding.getEncoding(); - assert source.encoding == encoding.jcoding; this.options = options; this.cachedEncodings = new EncodingCache(); this.tregexCache = new TRegexCache(); @@ -111,7 +113,11 @@ protected RubyClass getMetaObject( @Override public int compareTo(RubyRegexp o) { - final int sourceCompare = source.compareTo(o.source); + // Compare as binary as CRuby compares bytes regardless of the encodings + var a = source.forceEncodingUncached(encoding.tencoding, Encodings.BINARY.tencoding); + var b = o.source.forceEncodingUncached(encoding.tencoding, Encodings.BINARY.tencoding); + + final int sourceCompare = a.compareBytesUncached(b, Encodings.BINARY.tencoding); if (sourceCompare != 0) { return sourceCompare; } else { diff --git a/src/main/java/org/truffleruby/core/regexp/TRegexCache.java b/src/main/java/org/truffleruby/core/regexp/TRegexCache.java index de566868beb2..3e3ca6f9f6d9 100644 --- a/src/main/java/org/truffleruby/core/regexp/TRegexCache.java +++ b/src/main/java/org/truffleruby/core/regexp/TRegexCache.java @@ -15,19 +15,14 @@ import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.interop.InteropLibrary; import com.oracle.truffle.api.source.Source; -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; -import org.jcodings.specific.ISO8859_1Encoding; -import org.jcodings.specific.USASCIIEncoding; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyContext; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.regexp.TruffleRegexpNodes.TRegexCompileNode; -import org.truffleruby.core.rope.CannotConvertBinaryRubyStringToJavaString; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.CannotConvertBinaryRubyStringToJavaString; +import org.truffleruby.core.string.TStringBuilder; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.interop.InteropNodes; import org.truffleruby.interop.TranslateInteropExceptionNode; import org.truffleruby.language.Nil; @@ -127,14 +122,14 @@ private static boolean isBacktracking(Object tregex) { TranslateInteropExceptionNode.getUncached()); } - public static String toTRegexEncoding(Encoding encoding) { - if (encoding == UTF8Encoding.INSTANCE) { + public static String toTRegexEncoding(RubyEncoding encoding) { + if (encoding == Encodings.UTF_8) { return "UTF-8"; - } else if (encoding == USASCIIEncoding.INSTANCE) { + } else if (encoding == Encodings.US_ASCII) { return "ASCII"; - } else if (encoding == ISO8859_1Encoding.INSTANCE) { + } else if (encoding == Encodings.ISO_8859_1) { return "LATIN-1"; - } else if (encoding == ASCIIEncoding.INSTANCE) { + } else if (encoding == Encodings.BINARY) { return "BYTES"; } else { return null; @@ -145,20 +140,20 @@ public static String toTRegexEncoding(Encoding encoding) { private static Object compileTRegex(RubyContext context, RubyRegexp regexp, boolean atStart, RubyEncoding enc) { String processedRegexpSource; RubyEncoding[] fixedEnc = new RubyEncoding[]{ null }; - final RopeBuilder ropeBuilder; + final TStringBuilder tstringBuilder; try { - ropeBuilder = ClassicRegexp + tstringBuilder = ClassicRegexp .preprocess( - regexp.source, + new TStringWithEncoding(regexp.source, regexp.encoding), enc, fixedEnc, RegexpSupport.ErrorMode.RAISE); } catch (DeferredRaiseException dre) { throw dre.getException(context); } - Rope rope = ropeBuilder.toRope(); + var tstring = tstringBuilder.toTString(); try { - processedRegexpSource = RopeOperations.decodeRope(rope); + processedRegexpSource = TStringUtils.toJavaStringOrThrow(tstring, tstringBuilder.getRubyEncoding()); } catch (CannotConvertBinaryRubyStringToJavaString | UnsupportedCharsetException e) { // Some strings cannot be converted to Java strings, e.g. strings with the // BINARY encoding containing characters higher than 127. @@ -169,7 +164,7 @@ private static Object compileTRegex(RubyContext context, RubyRegexp regexp, bool String flags = optionsToFlags(regexp.options, atStart); - String tRegexEncoding = TRegexCache.toTRegexEncoding(enc.jcoding); + String tRegexEncoding = TRegexCache.toTRegexEncoding(enc); if (tRegexEncoding == null) { return null; } diff --git a/src/main/java/org/truffleruby/core/regexp/TruffleRegexpNodes.java b/src/main/java/org/truffleruby/core/regexp/TruffleRegexpNodes.java index 6d887d545382..b82236e45a26 100644 --- a/src/main/java/org/truffleruby/core/regexp/TruffleRegexpNodes.java +++ b/src/main/java/org/truffleruby/core/regexp/TruffleRegexpNodes.java @@ -32,8 +32,8 @@ import com.oracle.truffle.api.profiles.BranchProfile; import com.oracle.truffle.api.profiles.IntValueProfile; import com.oracle.truffle.api.profiles.LoopConditionProfile; -import org.jcodings.specific.ASCIIEncoding; -import org.jcodings.specific.UTF8Encoding; +import com.oracle.truffle.api.strings.TruffleString; +import com.oracle.truffle.api.strings.TruffleString.AsTruffleStringNode; import org.joni.Matcher; import org.joni.Option; import org.joni.Regex; @@ -57,20 +57,17 @@ import org.truffleruby.core.kernel.KernelNodes.SameOrEqualNode; import org.truffleruby.core.regexp.RegexpNodes.ToSNode; import org.truffleruby.core.regexp.TruffleRegexpNodesFactory.MatchNodeGen; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.core.rope.RopeWithEncoding; +import org.truffleruby.core.string.ATStringWithEncoding; +import org.truffleruby.core.string.TStringBuilder; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.core.string.StringNodes.StringAppendPrimitiveNode; import org.truffleruby.core.string.StringOperations; import org.truffleruby.core.string.StringUtils; import org.truffleruby.interop.TranslateInteropExceptionNode; import org.truffleruby.interop.TranslateInteropExceptionNodeGen; import org.truffleruby.language.RubyBaseNode; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.WarnNode; import org.truffleruby.language.control.DeferredRaiseException; import org.truffleruby.language.control.RaiseException; @@ -87,32 +84,29 @@ import org.truffleruby.language.objects.AllocationTracing; import org.truffleruby.parser.RubyDeferredWarnings; +import static com.oracle.truffle.api.strings.TruffleString.CodeRange.ASCII; +import static com.oracle.truffle.api.strings.TruffleString.CodeRange.BROKEN; +import static com.oracle.truffle.api.strings.TruffleString.CodeRange.VALID; + @CoreModule("Truffle::RegexpOperations") public class TruffleRegexpNodes { @TruffleBoundary private static void instrumentMatch(ConcurrentHashMap metricsMap, RubyRegexp regexp, Object string, boolean fromStart, boolean collectDetailedStats) { - Rope source = regexp.source; - RegexpOptions options = regexp.options; TruffleRegexpNodes.MatchInfo matchInfo = new TruffleRegexpNodes.MatchInfo(regexp, fromStart); ConcurrentOperations.getOrCompute(metricsMap, matchInfo, x -> new AtomicInteger()).incrementAndGet(); if (collectDetailedStats) { final MatchInfoStats stats = ConcurrentOperations .getOrCompute(MATCHED_REGEXP_STATS, matchInfo, x -> new MatchInfoStats()); - stats - .record( - RubyStringLibrary.getUncached().getRope(string), - RubyStringLibrary.getUncached().getEncoding(string)); + stats.record(new ATStringWithEncoding(RubyStringLibrary.getUncached(), string)); } } // MRI: rb_reg_prepare_enc public abstract static class PrepareRegexpEncodingNode extends PrimitiveArrayArgumentsNode { - @Child RopeNodes.CodeRangeNode codeRangeNode = RopeNodes.CodeRangeNode.create(); - @Child RubyStringLibrary stringLibrary = RubyStringLibrary.getFactory().createDispatched(2); @Child WarnNode warnNode; public static PrepareRegexpEncodingNode create() { @@ -121,12 +115,13 @@ public static PrepareRegexpEncodingNode create() { public abstract RubyEncoding executePrepare(RubyRegexp regexp, Object matchString); - @Specialization(guards = "stringLibrary.isRubyString(matchString)") + @Specialization(guards = "stringLibrary.isRubyString(matchString)", limit = "1") protected RubyEncoding regexpPrepareEncoding(RubyRegexp regexp, Object matchString, + @Cached RubyStringLibrary stringLibrary, + @Cached TruffleString.GetByteCodeRangeNode codeRangeNode, @Cached BranchProfile asciiOnlyProfile, @Cached BranchProfile asciiIncompatibleFixedRegexpEncodingProfile, @Cached BranchProfile asciiIncompatibleMatchStringEncodingProfile, - @Cached BranchProfile binaryRegexpMatchProfile, @Cached BranchProfile brokenMatchStringProfile, @Cached BranchProfile defaultRegexEncodingProfile, @Cached BranchProfile fallbackProcessingProfile, @@ -137,10 +132,11 @@ protected RubyEncoding regexpPrepareEncoding(RubyRegexp regexp, Object matchStri @Cached BranchProfile validUtf8MatchStringProfile) { final RubyEncoding regexpEncoding = regexp.encoding; final RubyEncoding matchStringEncoding = stringLibrary.getEncoding(matchString); - final Rope matchRope = stringLibrary.getRope(matchString); - final CodeRange matchStringCodeRange = codeRangeNode.execute(matchRope); + var tstring = stringLibrary.getTString(matchString); + final TruffleString.CodeRange matchStringCodeRange = codeRangeNode.execute(tstring, + matchStringEncoding.tencoding); - if (matchStringCodeRange == CodeRange.CR_BROKEN) { + if (matchStringCodeRange == BROKEN) { brokenMatchStringProfile.enter(); throw new RaiseException( @@ -159,18 +155,18 @@ protected RubyEncoding regexpPrepareEncoding(RubyRegexp regexp, Object matchStri // Both encodings are ASCII-compatible and as such can either be CR_7BIT or CR_VALID at this point // depending on the contents. CR_BROKEN strings are handled as a failure case earlier. - if (matchStringCodeRange == CodeRange.CR_7BIT) { + if (matchStringCodeRange == ASCII) { asciiOnlyProfile.enter(); return Encodings.US_ASCII; } else if (matchStringEncoding == Encodings.UTF_8) { validUtf8MatchStringProfile.enter(); - assert matchStringCodeRange == CodeRange.CR_VALID; + assert matchStringCodeRange == VALID; return Encodings.UTF_8; } else if (matchStringEncoding == Encodings.BINARY) { validBinaryMatchStringProfile.enter(); - assert matchStringCodeRange == CodeRange.CR_VALID; + assert matchStringCodeRange == VALID; return Encodings.BINARY; } @@ -181,25 +177,25 @@ protected RubyEncoding regexpPrepareEncoding(RubyRegexp regexp, Object matchStri // conflict with those in other branches. fallbackProcessingProfile.enter(); - if (regexpEncoding == stringLibrary.getEncoding(matchString)) { + if (regexpEncoding == matchStringEncoding) { sameEncodingProfile.enter(); return regexpEncoding; - } else if (matchStringCodeRange == CodeRange.CR_7BIT && regexpEncoding == Encodings.US_ASCII) { + } else if (matchStringCodeRange == ASCII && regexpEncoding == Encodings.US_ASCII) { asciiOnlyProfile.enter(); return Encodings.US_ASCII; - } else if (!matchStringEncoding.jcoding.isAsciiCompatible()) { + } else if (!matchStringEncoding.isAsciiCompatible) { asciiIncompatibleMatchStringEncodingProfile.enter(); - return raiseEncodingCompatibilityError(regexp, matchRope); + return raiseEncodingCompatibilityError(regexp, matchStringEncoding); } else if (regexp.options.isFixed()) { fixedRegexpEncodingProfile.enter(); - if (!regexpEncoding.jcoding.isAsciiCompatible() || matchStringCodeRange != CodeRange.CR_7BIT) { + if (!regexpEncoding.isAsciiCompatible || matchStringCodeRange != ASCII) { asciiIncompatibleFixedRegexpEncodingProfile.enter(); - return raiseEncodingCompatibilityError(regexp, matchRope); + return raiseEncodingCompatibilityError(regexp, matchStringEncoding); } return regexpEncoding; @@ -207,9 +203,8 @@ protected RubyEncoding regexpPrepareEncoding(RubyRegexp regexp, Object matchStri returnMatchStringEncodingProfile.enter(); if (regexp.options.isEncodingNone() && matchStringEncoding != Encodings.BINARY && - matchStringCodeRange != CodeRange.CR_7BIT) { - binaryRegexpMatchProfile.enter(); - + matchStringCodeRange != ASCII) { + // profiled by lazy node warnHistoricalBinaryRegexpMatch(matchStringEncoding); } @@ -218,13 +213,9 @@ protected RubyEncoding regexpPrepareEncoding(RubyRegexp regexp, Object matchStri } // MRI: reg_enc_error - private RubyEncoding raiseEncodingCompatibilityError(RubyRegexp regexp, Rope matchRope) { - throw new RaiseException( - getContext(), - coreExceptions().encodingCompatibilityErrorRegexpIncompatible( - regexp.encoding.jcoding, - matchRope.getEncoding(), - this)); + private RubyEncoding raiseEncodingCompatibilityError(RubyRegexp regexp, RubyEncoding matchStringEncoding) { + throw new RaiseException(getContext(), coreExceptions() + .encodingCompatibilityErrorRegexpIncompatible(regexp.encoding, matchStringEncoding, this)); } private void warnHistoricalBinaryRegexpMatch(RubyEncoding matchStringEncoding) { @@ -238,30 +229,36 @@ private void warnHistoricalBinaryRegexpMatch(RubyEncoding matchStringEncoding) { getContext().getCallStack().getTopMostUserSourceSection(), StringUtils.format( "historical binary regexp match /.../n against %s string", - stringLibrary.getJavaString(matchStringEncoding.name))); + getEncodingName(matchStringEncoding))); } } + + @TruffleBoundary + private String getEncodingName(RubyEncoding matchStringEncoding) { + return RubyGuards.getJavaString(matchStringEncoding.name); + } } @TruffleBoundary - private static Matcher getMatcher(Regex regex, byte[] stringBytes, int start) { - return regex.matcher(stringBytes, start, stringBytes.length); + private static Matcher getMatcher(Regex regex, byte[] stringBytes, int start, int end) { + return regex.matcher(stringBytes, start, end); } @TruffleBoundary - private static Matcher getMatcherNoRegion(Regex regex, byte[] stringBytes, int start) { - return regex.matcherNoRegion(stringBytes, start, stringBytes.length); + private static Matcher getMatcherNoRegion(Regex regex, byte[] stringBytes, int start, int end) { + return regex.matcherNoRegion(stringBytes, start, end); } @TruffleBoundary private static Regex makeRegexpForEncoding(RubyContext context, RubyRegexp regexp, RubyEncoding enc, Node currentNode) { final RubyEncoding[] fixedEnc = new RubyEncoding[]{ null }; - final Rope sourceRope = regexp.source; + var source = regexp.source; + var sourceInOtherEncoding = source.forceEncodingUncached(regexp.encoding.tencoding, enc.tencoding); try { - final RopeBuilder preprocessed = ClassicRegexp + final TStringBuilder preprocessed = ClassicRegexp .preprocess( - RopeOperations.withEncoding(sourceRope, enc.jcoding), + new TStringWithEncoding(sourceInOtherEncoding, enc), enc, fixedEnc, RegexpSupport.ErrorMode.RAISE); @@ -271,7 +268,7 @@ private static Regex makeRegexpForEncoding(RubyContext context, RubyRegexp regex preprocessed, options, enc, - sourceRope, + source, currentNode); } catch (DeferredRaiseException dre) { throw dre.getException(context); @@ -282,11 +279,12 @@ private static Regex makeRegexpForEncoding(RubyContext context, RubyRegexp regex public abstract static class RegexpUnionNode extends CoreMethodArrayArgumentsNode { @Child StringAppendPrimitiveNode appendNode = StringAppendPrimitiveNode.create(); + @Child AsTruffleStringNode asTruffleStringNode = AsTruffleStringNode.create(); @Child ToSNode toSNode = ToSNode.create(); @Child DispatchNode copyNode = DispatchNode.create(); @Child private SameOrEqualNode sameOrEqualNode = SameOrEqualNode.create(); - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); - @Child private RubyStringLibrary rubyStringLibrary = RubyStringLibrary.getFactory().createDispatched(2); + private final RubyStringLibrary rubyStringLibrary = RubyStringLibrary.create(); + private final RubyStringLibrary regexpStringLibrary = RubyStringLibrary.create(); @Specialization( guards = "argsMatch(frame, cachedArgs, args)", @@ -305,17 +303,20 @@ protected Object executeSlowUnion(RubyString str, Object sep, Object[] args, } public RubyRegexp buildUnion(RubyString str, Object sep, Object[] args, BranchProfile errorProfile) { + assert args.length > 0; RubyString regexpString = null; - for (int i = 0; i < args.length; i++) { + for (Object arg : args) { if (regexpString == null) { - regexpString = appendNode.executeStringAppend(str, string(args[i])); + regexpString = appendNode.executeStringAppend(str, string(arg)); } else { regexpString = appendNode.executeStringAppend(regexpString, sep); - regexpString = appendNode.executeStringAppend(regexpString, string(args[i])); + regexpString = appendNode.executeStringAppend(regexpString, string(arg)); } } + var encoding = regexpStringLibrary.getEncoding(regexpString); + var truffleString = asTruffleStringNode.execute(regexpString.tstring, encoding.tencoding); try { - return createRegexp(regexpString.rope, regexpString.encoding); + return createRegexp(truffleString, encoding); } catch (DeferredRaiseException dre) { errorProfile.enter(); throw dre.getException(getContext()); @@ -324,10 +325,9 @@ public RubyRegexp buildUnion(RubyString str, Object sep, Object[] args, BranchPr public Object string(Object obj) { if (rubyStringLibrary.isRubyString(obj)) { - final Rope rope = rubyStringLibrary.getRope(obj); - final RopeWithEncoding quotedRopeResult = ClassicRegexp - .quote19(rope, rubyStringLibrary.getEncoding(obj)); - return makeStringNode.fromRope(quotedRopeResult.getRope(), quotedRopeResult.getEncoding()); + final TStringWithEncoding quotedRopeResult = ClassicRegexp + .quote19(new ATStringWithEncoding(rubyStringLibrary, obj)); + return createString(quotedRopeResult); } else { return toSNode.execute((RubyRegexp) obj); } @@ -348,7 +348,8 @@ protected boolean argsMatch(VirtualFrame frame, Object[] cachedArgs, Object[] ar } @TruffleBoundary - public RubyRegexp createRegexp(Rope pattern, RubyEncoding encoding) throws DeferredRaiseException { + public RubyRegexp createRegexp(TruffleString pattern, RubyEncoding encoding) + throws DeferredRaiseException { return RubyRegexp.create(getLanguage(), pattern, encoding, RegexpOptions.fromEmbeddedOptions(0), this); } } @@ -423,9 +424,9 @@ protected RubyArray fillinInstrumentData(Map map, ArrayBui BuilderState state = arrayBuilderNode.start(arraySize); int n = 0; for (Entry e : map.entrySet()) { - Rope key = StringOperations.encodeRope(e.getKey().toString(), UTF8Encoding.INSTANCE); arrayBuilderNode - .appendValue(state, n++, StringOperations.createUTF8String(context, getLanguage(), key)); + .appendValue(state, n++, + StringOperations.createUTF8String(context, getLanguage(), e.getKey().toString())); arrayBuilderNode.appendValue(state, n++, e.getValue().get()); } return createArray(arrayBuilderNode.finish(state, n), n); @@ -518,9 +519,9 @@ protected Object buildUnusedRegexpsArray( final BuilderState state = arrayBuilderNode.start(unusedRegexps.size()); int n = 0; for (RubyRegexp entry : unusedRegexps) { - final Rope key = StringOperations.encodeRope(entry.toString(), UTF8Encoding.INSTANCE); arrayBuilderNode - .appendValue(state, n++, StringOperations.createUTF8String(getContext(), getLanguage(), key)); + .appendValue(state, n++, + StringOperations.createUTF8String(getContext(), getLanguage(), entry.toString())); } return createArray(arrayBuilderNode.finish(state, n), n); @@ -645,21 +646,13 @@ private void processGroup(ConcurrentHashMap group, ArrayBuilderNode arrayBuilderNode, BuilderState state, int offset) { int n = 0; for (Entry entry : group.entrySet()) { - arrayBuilderNode - .appendValue( - state, - offset + n, - buildHash( - hashStoreLibrary, - arrayBuilderNode, - isTRegexMatch, - entry.getKey(), - entry.getValue())); + arrayBuilderNode.appendValue(state, offset + n, + buildHash(hashStoreLibrary, isTRegexMatch, entry.getKey(), entry.getValue())); n++; } } - private RubyHash buildHash(HashStoreLibrary hashStoreLibrary, ArrayBuilderNode arrayBuilderNode, + private RubyHash buildHash(HashStoreLibrary hashStoreLibrary, boolean isTRegexMatch, MatchInfo matchInfo, AtomicInteger count) { final RubyHash regexpInfoHash = CompiledRegexpHashArray.buildRegexInfoHash( @@ -690,7 +683,7 @@ private RubyHash buildHash(HashStoreLibrary hashStoreLibrary, ArrayBuilderNode a matchInfoHash.store, matchInfoHash, getLanguage().getSymbol("match_stats"), - buildMatchInfoStatsHash(hashStoreLibrary, arrayBuilderNode, matchInfo), + buildMatchInfoStatsHash(hashStoreLibrary, matchInfo), true); } @@ -699,19 +692,10 @@ private RubyHash buildHash(HashStoreLibrary hashStoreLibrary, ArrayBuilderNode a return matchInfoHash; } - private RubyHash buildMatchInfoStatsHash(HashStoreLibrary hashStoreLibrary, ArrayBuilderNode arrayBuilderNode, - MatchInfo matchInfo) { + private RubyHash buildMatchInfoStatsHash(HashStoreLibrary hashStoreLibrary, MatchInfo matchInfo) { final MatchInfoStats stats = MATCHED_REGEXP_STATS.get(matchInfo); final RubyHash ret = HashOperations.newEmptyHash(getContext(), getLanguage()); - buildAndSetDistributionHash( - hashStoreLibrary, - ret, - "byte_array_populated", - stats.byteArrayPopulatedFrequencies, - Optional.empty(), - Optional.of(count -> count.get())); - buildAndSetDistributionHash( hashStoreLibrary, ret, @@ -749,11 +733,7 @@ private RubyHash buildMatchInfoStatsHash(HashStoreLibrary hashStoreLibrary, Arra ret, "rope_types", stats.ropeClassFrequencies, - Optional.of( - className -> StringOperations.createUTF8String( - getContext(), - getLanguage(), - StringOperations.encodeRope(className, UTF8Encoding.INSTANCE))), + Optional.of(className -> StringOperations.createUTF8String(getContext(), getLanguage(), className)), Optional.of(count -> count.get())); return ret; @@ -809,7 +789,7 @@ public abstract Object executeMatchInRegion(RubyRegexp regexp, Object string, in * * @param createMatchData Whether to create a Ruby `MatchData` object with the results of the match or return a * simple Boolean value indicating a successful match (true: match; false: mismatch). */ - @Specialization(guards = "libString.isRubyString(string)") + @Specialization(guards = "libString.isRubyString(string)", limit = "1") protected Object matchInRegion( RubyRegexp regexp, Object string, @@ -821,10 +801,10 @@ protected Object matchInRegion( @Cached ConditionProfile createMatchDataProfile, @Cached ConditionProfile encodingMismatchProfile, @Cached PrepareRegexpEncodingNode prepareRegexpEncodingNode, - @Cached RopeNodes.BytesNode bytesNode, + @Cached TruffleString.GetInternalByteArrayNode getInternalByteArrayNode, + @Cached ConditionProfile zeroOffsetProfile, @Cached MatchNode matchNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - final Rope rope = libString.getRope(string); + @Cached RubyStringLibrary libString) { Regex regex = regexp.regex; final RubyEncoding negotiatedEncoding = prepareRegexpEncodingNode.executePrepare(regexp, string); @@ -834,15 +814,25 @@ protected Object matchInRegion( .getOrCreate(negotiatedEncoding, e -> makeRegexpForEncoding(getContext(), regexp, e, this)); } - final Matcher matcher; + var tstring = libString.getTString(string); + var byteArray = getInternalByteArrayNode.execute(tstring, libString.getTEncoding(string)); + final int offset; + if (zeroOffsetProfile.profile(byteArray.getOffset() == 0)) { + offset = 0; + } else { + offset = byteArray.getOffset(); + } + + final Matcher matcher; if (createMatchDataProfile.profile(createMatchData)) { - matcher = getMatcher(regex, bytesNode.execute(rope), startPos); + matcher = getMatcher(regex, byteArray.getArray(), offset + startPos, byteArray.getEnd()); } else { - matcher = getMatcherNoRegion(regex, bytesNode.execute(rope), startPos); + matcher = getMatcherNoRegion(regex, byteArray.getArray(), offset + startPos, byteArray.getEnd()); } - return matchNode.execute(regexp, string, matcher, fromPos, toPos, atStart, createMatchData); + return matchNode.execute(regexp, string, matcher, offset + fromPos, offset + toPos, atStart, + createMatchData); } } @@ -855,9 +845,9 @@ public abstract static class MatchInRegionTRegexNode extends PrimitiveArrayArgum @Child DispatchNode stringDupNode; @Child TranslateInteropExceptionNode translateInteropExceptionNode; - @Child RopeNodes.GetBytesObjectNode getBytesObjectNode; + @Child TruffleString.SubstringByteIndexNode substringByteIndexNode; - @Specialization(guards = "libString.isRubyString(string)") + @Specialization(guards = "libString.isRubyString(string)", limit = "1") protected Object matchInRegionTRegex( RubyRegexp regexp, Object string, @@ -866,6 +856,7 @@ protected Object matchInRegionTRegex( boolean atStart, int startPos, boolean createMatchData, + @Cached TruffleString.SwitchEncodingNode switchEncodingNode, @Cached ConditionProfile createMatchDataProfile, @Cached ConditionProfile matchFoundProfile, @Cached ConditionProfile tRegexCouldNotCompileProfile, @@ -875,16 +866,16 @@ protected Object matchInRegionTRegex( @CachedLibrary(limit = "getInteropCacheLimit()") InteropLibrary regexInterop, @CachedLibrary(limit = "getInteropCacheLimit()") InteropLibrary resultInterop, @Cached PrepareRegexpEncodingNode prepareRegexpEncodingNode, - @Cached RopeNodes.BytesNode bytesNode, @Cached TRegexCompileNode tRegexCompileNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString, + @Cached RubyStringLibrary libString, @Cached IntValueProfile groupCountProfile) { - Rope rope = libString.getRope(string); final Object tRegex; final RubyEncoding negotiatedEncoding = prepareRegexpEncodingNode.executePrepare(regexp, string); + var tstring = switchEncodingNode.execute(libString.getTString(string), negotiatedEncoding.tencoding); + final int byteLength = tstring.byteLength(negotiatedEncoding.tencoding); if (tRegexIncompatibleProfile - .profile(toPos < fromPos || toPos != rope.byteLength() || fromPos < 0) || + .profile(toPos < fromPos || toPos != byteLength || fromPos < 0) || tRegexCouldNotCompileProfile.profile((tRegex = tRegexCompileNode.executeTRegexCompile( regexp, atStart, @@ -910,38 +901,34 @@ protected Object matchInRegionTRegex( } int fromIndex = fromPos; - final Object interopByteArray; + final TruffleString tstringToMatch; final String execMethod; if (createMatchDataProfile.profile(createMatchData)) { if (startPosNotZeroProfile.profile(startPos > 0)) { - // GR-32765: When adopting TruffleString, use a TruffleString substring here instead // If startPos != 0, then fromPos == startPos. assert fromPos == startPos; fromIndex = 0; - if (getBytesObjectNode == null) { + if (substringByteIndexNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); - getBytesObjectNode = insert(RopeNodes.GetBytesObjectNode.create()); + substringByteIndexNode = insert(TruffleString.SubstringByteIndexNode.create()); } - interopByteArray = getBytesObjectNode.getRange(rope, startPos, toPos); - execMethod = "exec"; + tstringToMatch = substringByteIndexNode.execute(tstring, startPos, toPos - startPos, + negotiatedEncoding.tencoding, true); } else { - final byte[] bytes = bytesNode.execute(rope); - interopByteArray = getContext().getEnv().asGuestValue(bytes); - execMethod = "execBytes"; + tstringToMatch = tstring; } + execMethod = "exec"; } else { // Only strscan ever passes a non-zero startPos and that never uses `match?`. assert startPos == 0 : "Simple Boolean match not supported with non-zero startPos"; - final byte[] bytes = bytesNode.execute(rope); - // TODO: remove HostAccess in ContextPermissionsTest#testRequireGem when migrated to TruffleString - interopByteArray = getContext().getEnv().asGuestValue(bytes); + tstringToMatch = tstring; execMethod = "execBoolean"; } - final Object result = invoke(regexInterop, tRegex, execMethod, interopByteArray, fromIndex); + final Object result = invoke(regexInterop, tRegex, execMethod, tstringToMatch, fromIndex); if (createMatchDataProfile.profile(createMatchData)) { final boolean isMatch = (boolean) readMember(resultInterop, result, "isMatch"); @@ -1182,29 +1169,26 @@ public String toString() { static final class MatchInfoStats { - private final ConcurrentHashMap byteArrayPopulatedFrequencies = new ConcurrentHashMap<>(); private final ConcurrentHashMap byteLengthFrequencies = new ConcurrentHashMap<>(); private final ConcurrentHashMap characterLengthFrequencies = new ConcurrentHashMap<>(); - private final ConcurrentHashMap codeRangeFrequencies = new ConcurrentHashMap<>(); + private final ConcurrentHashMap codeRangeFrequencies = new ConcurrentHashMap<>(); private final ConcurrentHashMap encodingFrequencies = new ConcurrentHashMap<>(); private final ConcurrentHashMap ropeClassFrequencies = new ConcurrentHashMap<>(); - private void record(Rope rope, RubyEncoding encoding) { + private void record(ATStringWithEncoding string) { ConcurrentOperations - .getOrCompute(byteArrayPopulatedFrequencies, rope.getRawBytes() != null, x -> new AtomicLong()) + .getOrCompute(byteLengthFrequencies, string.byteLength(), x -> new AtomicLong()) .incrementAndGet(); ConcurrentOperations - .getOrCompute(byteLengthFrequencies, rope.byteLength(), x -> new AtomicLong()) + .getOrCompute(characterLengthFrequencies, string.characterLength(), x -> new AtomicLong()) .incrementAndGet(); ConcurrentOperations - .getOrCompute(characterLengthFrequencies, rope.characterLength(), x -> new AtomicLong()) + .getOrCompute(codeRangeFrequencies, string.getCodeRange(), x -> new AtomicLong()) .incrementAndGet(); - ConcurrentOperations - .getOrCompute(codeRangeFrequencies, rope.getCodeRange(), x -> new AtomicLong()) + ConcurrentOperations.getOrCompute(encodingFrequencies, string.encoding, x -> new AtomicLong()) .incrementAndGet(); - ConcurrentOperations.getOrCompute(encodingFrequencies, encoding, x -> new AtomicLong()).incrementAndGet(); ConcurrentOperations - .getOrCompute(ropeClassFrequencies, rope.getClass().getSimpleName(), x -> new AtomicLong()) + .getOrCompute(ropeClassFrequencies, string.getClass().getSimpleName(), x -> new AtomicLong()) .incrementAndGet(); } @@ -1218,23 +1202,16 @@ private void record(Rope rope, RubyEncoding encoding) { /** WARNING: computeRegexpEncoding() mutates options, so the caller should make sure it's a copy */ @TruffleBoundary - public static Regex compile(RubyLanguage language, RubyDeferredWarnings rubyDeferredWarnings, - RopeWithEncoding bytes, RegexpOptions[] optionsArray, Node currentNode) - throws DeferredRaiseException { - if (optionsArray[0].isEncodingNone()) { - bytes = new RopeWithEncoding( - RopeOperations.withEncoding(bytes.getRope(), ASCIIEncoding.INSTANCE), - Encodings.BINARY); - } + public static Regex compile(RubyDeferredWarnings rubyDeferredWarnings, TStringWithEncoding bytes, + RegexpOptions[] optionsArray, Node currentNode) throws DeferredRaiseException { RubyEncoding enc = bytes.getEncoding(); RubyEncoding[] fixedEnc = new RubyEncoding[]{ null }; - RopeBuilder unescaped = ClassicRegexp - .preprocess(bytes.getRope(), enc, fixedEnc, RegexpSupport.ErrorMode.RAISE); + TStringBuilder unescaped = ClassicRegexp.preprocess(bytes, enc, fixedEnc, RegexpSupport.ErrorMode.RAISE); enc = ClassicRegexp.computeRegexpEncoding(optionsArray, enc, fixedEnc); Regex regexp = ClassicRegexp - .makeRegexp(rubyDeferredWarnings, unescaped, optionsArray[0], enc, bytes.getRope(), currentNode); - regexp.setUserObject(new RopeWithEncoding(RopeOperations.withEncoding(bytes.getRope(), enc.jcoding), enc)); + .makeRegexp(rubyDeferredWarnings, unescaped, optionsArray[0], enc, bytes.tstring, currentNode); + regexp.setUserObject(bytes.forceEncoding(enc)); return regexp; } diff --git a/src/main/java/org/truffleruby/core/rope/AsciiOnlyLeafRope.java b/src/main/java/org/truffleruby/core/rope/AsciiOnlyLeafRope.java deleted file mode 100644 index 3c24e06d7a10..000000000000 --- a/src/main/java/org/truffleruby/core/rope/AsciiOnlyLeafRope.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2016, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ - -package org.truffleruby.core.rope; - -import com.oracle.truffle.api.profiles.ConditionProfile; -import org.jcodings.Encoding; - -import com.oracle.truffle.api.CompilerDirectives; - -public class AsciiOnlyLeafRope extends LeafRope { - - public AsciiOnlyLeafRope(byte[] bytes, Encoding encoding) { - super(bytes, encoding, CodeRange.CR_7BIT, bytes.length); - - assert RopeOperations.isAsciiOnly(bytes, encoding) : "MBC string incorrectly marked as CR_7BIT"; - } - - @Override - Rope withEncoding7bit(Encoding newEncoding, ConditionProfile bytesNotNull) { - return new AsciiOnlyLeafRope(getRawBytes(), newEncoding); - } - - @Override - Rope withBinaryEncoding(ConditionProfile bytesNotNull) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - throw new UnsupportedOperationException("Must only be called for CR_VALID Strings"); - } -} diff --git a/src/main/java/org/truffleruby/core/rope/Bytes.java b/src/main/java/org/truffleruby/core/rope/Bytes.java deleted file mode 100644 index 4098418e05e2..000000000000 --- a/src/main/java/org/truffleruby/core/rope/Bytes.java +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (c) 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.rope; - -import com.oracle.truffle.api.CompilerDirectives.ValueType; -import com.oracle.truffle.api.dsl.Cached; -import com.oracle.truffle.api.interop.InteropLibrary; -import com.oracle.truffle.api.interop.InvalidArrayIndexException; -import com.oracle.truffle.api.interop.TruffleObject; -import com.oracle.truffle.api.library.ExportLibrary; -import com.oracle.truffle.api.library.ExportMessage; -import com.oracle.truffle.api.profiles.BranchProfile; - -@ValueType -@ExportLibrary(InteropLibrary.class) -public final class Bytes implements TruffleObject { - public final byte[] array; - public final int offset; - public final int length; - - public Bytes(byte[] array, int offset, int length) { - assert offset >= 0 && length >= 0 && offset + length <= array.length; - this.array = array; - this.offset = offset; - this.length = length; - } - - public Bytes(byte[] array) { - this(array, 0, array.length); - } - - public static Bytes fromRange(byte[] array, int start, int end) { - assert 0 <= start && start <= end && end <= array.length; - return new Bytes(array, start, end - start); - } - - /** Just like {@link #fromRange(byte[], int, int)}, but will clamp the length to stay within the bounds. */ - public static Bytes fromRangeClamped(byte[] array, int start, int end) { - return fromRange(array, start, Math.min(array.length, end)); - } - - /** Returns the end offset, equal to {@link #offset} + {@link #length}. */ - public int end() { - return offset + length; - } - - public boolean isEmpty() { - return length == 0; - } - - public Bytes slice(int offset, int length) { - assert offset >= 0 && length >= 0 && offset + length <= this.length; - return new Bytes(this.array, this.offset + offset, length); - } - - public Bytes sliceRange(int start, int end) { - assert start >= 0 && end >= 0 && start <= end && end <= this.length; - return new Bytes(this.array, this.offset + start, end - start); - } - - /** Just like {@link #slice(int, int)}}, but will clamp the length to stay within the bounds. */ - public Bytes clampedSlice(int offset, int length) { - return slice(offset, Math.min(length, this.length - offset)); - } - - /** Just like {@link #sliceRange(int, int)}}, but will clamp the end offset to stay within the bounds. */ - public Bytes clampedRange(int start, int end) { - return sliceRange(start, Math.min(end, this.length)); - } - - public byte get(int i) { - return array[offset + i]; - } - - // region Array messages for TRegex - @ExportMessage - public boolean hasArrayElements() { - return true; - } - - @ExportMessage - public long getArraySize() { - return length; - } - - @ExportMessage - public Object readArrayElement(long index, - @Cached BranchProfile errorProfile) throws InvalidArrayIndexException { - if (isArrayElementReadable(index)) { - return get((int) index); - } else { - errorProfile.enter(); - throw InvalidArrayIndexException.create(index); - } - } - - @ExportMessage - public boolean isArrayElementReadable(long index) { - return index >= 0 && index < length; - } - // endregion -} diff --git a/src/main/java/org/truffleruby/core/rope/CodeRange.java b/src/main/java/org/truffleruby/core/rope/CodeRange.java deleted file mode 100644 index f91ceb10f587..000000000000 --- a/src/main/java/org/truffleruby/core/rope/CodeRange.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2016, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - * - * - * Some of the code in this class is modified from org.jruby.util.StringSupport, - * licensed under the same EPL 2.0/GPL 2.0/LGPL 2.1 used throughout. - */ - -package org.truffleruby.core.rope; - -import com.oracle.truffle.api.CompilerDirectives; - -public enum CodeRange { - /** Used for {@link NativeRope}, where the bytes can change from real native code. Also used when building a new - * {@link Rope} and the code range is unknown. */ - CR_UNKNOWN(0), - /** Only used for ASCII-compatible encodings, when all characters are US-ASCII (7-bit). */ - CR_7BIT(1), - /** All characters are valid, but at least one non-7-bit character. */ - CR_VALID(2), - /** At least one character is not valid in the encoding of that Rope. */ - CR_BROKEN(3); - - private final int value; - - CodeRange(int value) { - this.value = value; - } - - public int toInt() { - return value; - } - - public static CodeRange fromInt(int codeRange) { - switch (codeRange) { - case 0: - return CR_UNKNOWN; - case 1: - return CR_7BIT; - case 2: - return CR_VALID; - case 3: - return CR_BROKEN; - default: - CompilerDirectives.transferToInterpreterAndInvalidate(); - throw new UnsupportedOperationException("Don't know how to convert code range: " + codeRange); - } - } -} diff --git a/src/main/java/org/truffleruby/core/rope/ConcatRope.java b/src/main/java/org/truffleruby/core/rope/ConcatRope.java deleted file mode 100644 index aa112bf9ed13..000000000000 --- a/src/main/java/org/truffleruby/core/rope/ConcatRope.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2015, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.rope; - -import com.oracle.truffle.api.CompilerAsserts; -import com.oracle.truffle.api.CompilerDirectives; -import com.oracle.truffle.api.CompilerDirectives.ValueType; -import com.oracle.truffle.api.profiles.ConditionProfile; -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; - -import java.lang.invoke.VarHandle; - -public class ConcatRope extends ManagedRope { - - /** Wrapper for the current state of the concat rope, including null children and a a byte array, or a null byte - * array and the children. Accessing the state through {@link #getState()} avoids race conditions. */ - @ValueType - public static class ConcatState { - public final ManagedRope left, right; - public final byte[] bytes; - - public ConcatState(ManagedRope left, ManagedRope right, byte[] bytes) { - assert bytes == null && left != null && right != null || bytes != null && left == null && right == null; - this.left = left; - this.right = right; - this.bytes = bytes; - } - - public boolean isFlattened() { - return bytes != null; - } - } - - private ManagedRope left; - private ManagedRope right; - - public ConcatRope( - ManagedRope left, - ManagedRope right, - Encoding encoding, - CodeRange codeRange) { - this( - left, - right, - encoding, - codeRange, - left.byteLength() + right.byteLength(), - left.characterLength() + right.characterLength(), - null); - } - - private ConcatRope( - ManagedRope left, - ManagedRope right, - Encoding encoding, - CodeRange codeRange, - int byteLength, - int characterLength, - byte[] bytes) { - super(encoding, codeRange, byteLength, characterLength, bytes); - assert left != null; - assert right != null; - this.left = left; - this.right = right; - } - - @Override - Rope withEncoding7bit(Encoding newEncoding, ConditionProfile bytesNotNull) { - assert getCodeRange() == CodeRange.CR_7BIT; - return withEncoding(newEncoding, CodeRange.CR_7BIT, characterLength(), bytesNotNull); - } - - @Override - Rope withBinaryEncoding(ConditionProfile bytesNotNull) { - assert getCodeRange() == CodeRange.CR_VALID; - return withEncoding(ASCIIEncoding.INSTANCE, CodeRange.CR_VALID, byteLength(), bytesNotNull); - } - - private Rope withEncoding(Encoding encoding, CodeRange codeRange, int characterLength, - ConditionProfile bytesNotNull) { - final ConcatState state = getState(bytesNotNull); - if (state.isFlattened()) { - return RopeOperations.create(state.bytes, encoding, codeRange); - } else { - return new ConcatRope(state.left, state.right, encoding, codeRange, byteLength(), characterLength, null); - } - } - - @Override - protected byte[] getBytesSlow() { - flatten(); - return bytes; - } - - private void flatten() { - bytes = RopeOperations.flattenBytes(this); - VarHandle.storeStoreFence(); - left = null; - right = null; - } - - /** Access the state in a way that prevents race conditions. - * - *

- * This version is not allowed in compiled code, use {@link #getState(ConditionProfile)} there instead. */ - public ConcatState getState() { - CompilerAsserts.neverPartOfCompilation("Use #getState(ConditionProfile) instead."); - return getState(ConditionProfile.getUncached()); - } - - /** Access the state in a way that prevents race conditions. - * - *

- * Outside compiled code, you can use {@link #getState()}. */ - public ConcatState getState(ConditionProfile bytesNotNull) { - final ManagedRope left = this.left; - final ManagedRope right = this.right; - VarHandle.loadLoadFence(); - final byte[] bytes = this.bytes; - if (bytesNotNull.profile(bytes != null)) { - return new ConcatState(null, null, bytes); - } else if (left != null && right != null) { - return new ConcatState(left, right, null); - } else { - throw CompilerDirectives - .shouldNotReachHere("our assumptions about reordering and memory barriers seem incorrect"); - } - } -} diff --git a/src/main/java/org/truffleruby/core/rope/InvalidLeafRope.java b/src/main/java/org/truffleruby/core/rope/InvalidLeafRope.java deleted file mode 100644 index 89d3a3762eb2..000000000000 --- a/src/main/java/org/truffleruby/core/rope/InvalidLeafRope.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2015, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ - -package org.truffleruby.core.rope; - -import com.oracle.truffle.api.profiles.ConditionProfile; -import org.jcodings.Encoding; - -import com.oracle.truffle.api.CompilerDirectives; - -public class InvalidLeafRope extends LeafRope { - - public InvalidLeafRope(byte[] bytes, Encoding encoding, int characterLength) { - super(bytes, encoding, CodeRange.CR_BROKEN, characterLength); - - assert RopeOperations.isInvalid(bytes, encoding) : "valid string incorrectly marked as CR_BROKEN"; - } - - @Override - Rope withEncoding7bit(Encoding newEncoding, ConditionProfile bytesNotNull) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - throw new UnsupportedOperationException("Must only be called for ASCII-only Strings"); - } - - @Override - Rope withBinaryEncoding(ConditionProfile bytesNotNull) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - throw new UnsupportedOperationException("Must only be called for CR_VALID Strings"); - } -} diff --git a/src/main/java/org/truffleruby/core/rope/LazyIntRope.java b/src/main/java/org/truffleruby/core/rope/LazyIntRope.java deleted file mode 100644 index 1f9bd40019af..000000000000 --- a/src/main/java/org/truffleruby/core/rope/LazyIntRope.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2016, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.rope; - -import com.oracle.truffle.api.CompilerDirectives; -import com.oracle.truffle.api.CompilerDirectives.CompilationFinal; -import com.oracle.truffle.api.profiles.ConditionProfile; -import org.jcodings.Encoding; -import org.jcodings.specific.USASCIIEncoding; - -import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; - -public class LazyIntRope extends ManagedRope { - - final int value; - - public LazyIntRope(int value) { - this(value, USASCIIEncoding.INSTANCE, length(value)); - } - - public LazyIntRope(int value, Encoding encoding) { - this(value, encoding, length(value)); - } - - public LazyIntRope(int value, Encoding encoding, int length) { - super(encoding, CodeRange.CR_7BIT, length, length, null); - this.value = value; - assert Integer.toString(value).length() == length : value + " " + length; - } - - // @formatter:off - @CompilationFinal(dimensions = 1) private static final long[] LENGTH_TABLE = { - 0x100000000L, 0x1FFFFFFF6L, 0x1FFFFFFF6L, - 0x1FFFFFFF6L, 0x2FFFFFF9CL, 0x2FFFFFF9CL, - 0x2FFFFFF9CL, 0x3FFFFFC18L, 0x3FFFFFC18L, - 0x3FFFFFC18L, 0x4FFFFD8F0L, 0x4FFFFD8F0L, - 0x4FFFFD8F0L, 0x4FFFFD8F0L, 0x5FFFE7960L, - 0x5FFFE7960L, 0x5FFFE7960L, 0x6FFF0BDC0L, - 0x6FFF0BDC0L, 0x6FFF0BDC0L, 0x7FF676980L, - 0x7FF676980L, 0x7FF676980L, 0x7FF676980L, - 0x8FA0A1F00L, 0x8FA0A1F00L, 0x8FA0A1F00L, - 0x9C4653600L, 0x9C4653600L, 0x9C4653600L, - 0xA00000000L, 0xA00000000L - }; - // @formatter:on - - // From https://lemire.me/blog/2021/06/03/computing-the-number-of-digits-of-an-integer-even-faster/ - // and https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/4e6e171a7d/2021/06/03/digitcount.c (license: public domain) - private static int length(int value) { - final int sign; - if (CompilerDirectives.injectBranchProbability(CompilerDirectives.UNLIKELY_PROBABILITY, value < 0)) { - // We can't represent -Integer.MIN_VALUE (it results in Integer.MIN_VALUE), so we need to handle it explicitly - if (CompilerDirectives - .injectBranchProbability(CompilerDirectives.SLOWPATH_PROBABILITY, value == Integer.MIN_VALUE)) { - return 11; - } - - sign = 1; - value = -value; - } else { - sign = 0; - } - - final int bits = 31 - Integer.numberOfLeadingZeros(value | 1); - int digits = (int) ((value + LENGTH_TABLE[bits]) >>> 32); - return sign + digits; - } - - @Override - Rope withEncoding7bit(Encoding newEncoding, ConditionProfile bytesNotNull) { - assert getCodeRange() == CodeRange.CR_7BIT; - return new LazyIntRope(value, newEncoding, length(value)); - } - - @Override - Rope withBinaryEncoding(ConditionProfile bytesNotNull) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - throw new UnsupportedOperationException("Must only be called for CR_VALID Strings"); - } - - - @Override - protected byte[] getBytesSlow() { - return RopeOperations.encodeAsciiBytes(valueToString(value)); - } - - @TruffleBoundary - private String valueToString(int value) { - return Integer.toString(value); - } - - public int getValue() { - return value; - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/LeafRope.java b/src/main/java/org/truffleruby/core/rope/LeafRope.java deleted file mode 100644 index c3ac1fc82ca0..000000000000 --- a/src/main/java/org/truffleruby/core/rope/LeafRope.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2015, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.rope; - -import org.jcodings.Encoding; - -public abstract class LeafRope extends ManagedRope { - - public LeafRope(byte[] bytes, Encoding encoding, CodeRange codeRange, int characterLength) { - super(encoding, codeRange, bytes.length, characterLength, bytes); - } - - @Override - public byte getByteSlow(int index) { - return getRawBytes()[index]; - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/ManagedRope.java b/src/main/java/org/truffleruby/core/rope/ManagedRope.java deleted file mode 100644 index 8541ab87d232..000000000000 --- a/src/main/java/org/truffleruby/core/rope/ManagedRope.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2017, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.rope; - -import com.oracle.truffle.api.CompilerAsserts; -import org.jcodings.Encoding; - -public abstract class ManagedRope extends Rope { - - private final CodeRange codeRange; - private final int characterLength; - - protected ManagedRope( - Encoding encoding, - CodeRange codeRange, - int byteLength, - int characterLength, - byte[] bytes) { - super(encoding, byteLength, bytes); - - this.codeRange = codeRange; - this.characterLength = characterLength; - - assert !encoding.isSingleByte() || byteLength == characterLength; - } - - @Override - public final CodeRange getCodeRange() { - return this.codeRange; - } - - @Override - public final int characterLength() { - return characterLength; - } - - @Override - protected byte getByteSlow(int index) { - return getBytes()[index]; - } - - @Override - public final byte[] getBytes() { - CompilerAsserts.neverPartOfCompilation("Use RopeNodes.ByteNodes instead, or add a @TruffleBoundary."); - if (bytes == null) { - bytes = getBytesSlow(); - } - - return bytes; - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/NativeRope.java b/src/main/java/org/truffleruby/core/rope/NativeRope.java deleted file mode 100644 index ed7492df32ac..000000000000 --- a/src/main/java/org/truffleruby/core/rope/NativeRope.java +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright (c) 2016, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.rope; - -import com.oracle.truffle.api.profiles.ConditionProfile; -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; -import org.truffleruby.RubyLanguage; -import org.truffleruby.core.string.StringAttributes; -import org.truffleruby.core.string.StringSupport; -import org.truffleruby.extra.ffi.Pointer; - -import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; - -public class NativeRope extends Rope { - - public static final int UNKNOWN_CHARACTER_LENGTH = -1; - - private CodeRange codeRange; - private int characterLength; - private final Pointer pointer; - - public NativeRope( - RubyLanguage language, - byte[] bytes, - Encoding encoding, - int characterLength, - CodeRange codeRange) { - this(allocateNativePointer(language, bytes), bytes.length, encoding, characterLength, codeRange); - } - - private NativeRope(Pointer pointer, int byteLength, Encoding encoding, int characterLength, CodeRange codeRange) { - super(encoding, byteLength, null); - - assert (codeRange == CodeRange.CR_UNKNOWN) == (characterLength == UNKNOWN_CHARACTER_LENGTH); - this.codeRange = codeRange; - this.characterLength = characterLength; - this.pointer = pointer; - } - - private static Pointer allocateNativePointer(RubyLanguage language, byte[] bytes) { - final Pointer pointer = Pointer.mallocAutoRelease(bytes.length + 1, language); - pointer.writeBytes(0, bytes, 0, bytes.length); - pointer.writeByte(bytes.length, (byte) 0); - return pointer; - } - - private static Pointer copyNativePointer(RubyLanguage language, Pointer existing) { - final Pointer pointer = Pointer.mallocAutoRelease(existing.getSize(), language); - pointer.writeBytes(0, existing, 0, existing.getSize()); - return pointer; - } - - public static NativeRope newBuffer(RubyLanguage language, int byteCapacity, int byteLength) { - assert byteCapacity >= byteLength; - - final Pointer pointer = Pointer.callocAutoRelease(byteCapacity + 1, language); - - return new NativeRope( - pointer, - byteLength, - ASCIIEncoding.INSTANCE, - UNKNOWN_CHARACTER_LENGTH, - CodeRange.CR_UNKNOWN); - } - - public NativeRope withByteLength(int newByteLength, int characterLength, CodeRange codeRange) { - pointer.writeByte(newByteLength, (byte) 0); // Like MRI - return new NativeRope(pointer, newByteLength, getEncoding(), characterLength, codeRange); - } - - public NativeRope makeCopy(RubyLanguage language) { - final Pointer newPointer = copyNativePointer(language, pointer); - return new NativeRope(newPointer, byteLength(), getEncoding(), characterLength(), getCodeRange()); - } - - public NativeRope resize(RubyLanguage language, int newByteLength) { - assert byteLength() != newByteLength; - - final Pointer pointer = Pointer.mallocAutoRelease(newByteLength + 1, language); - pointer.writeBytes(0, this.pointer, 0, Math.min(getNativePointer().getSize(), newByteLength)); - pointer.writeByte(newByteLength, (byte) 0); // Like MRI - return new NativeRope(pointer, newByteLength, getEncoding(), UNKNOWN_CHARACTER_LENGTH, CodeRange.CR_UNKNOWN); - } - - /** Creates a new native rope which preserves existing bytes and byte length up to newCapacity - * - * @param context the Ruby context - * @param newCapacity the size in bytes minus one of the new pointer length - * @return the new NativeRope */ - public NativeRope expandCapacity(RubyLanguage language, int newCapacity) { - assert getCapacity() != newCapacity; - final Pointer pointer = Pointer.mallocAutoRelease(newCapacity + 1, language); - pointer.writeBytes(0, this.pointer, 0, Math.min(getNativePointer().getSize(), newCapacity)); - pointer.writeByte(newCapacity, (byte) 0); // Like MRI - return new NativeRope( - pointer, - byteLength(), - getEncoding(), - UNKNOWN_CHARACTER_LENGTH, - CodeRange.CR_UNKNOWN); - } - - @Override - public byte[] getBytes() { - // Always re-read bytes from the native pointer as they might have changed. - final byte[] bytes = new byte[byteLength()]; - copyTo(0, bytes, 0, byteLength()); - return bytes; - } - - public CodeRange getRawCodeRange() { - return codeRange; - } - - @Override - public CodeRange getCodeRange() { - if (codeRange == CodeRange.CR_UNKNOWN) { - final StringAttributes attributes = RopeOperations - .calculateCodeRangeAndLength(getEncoding(), getBytes(), 0, byteLength()); - updateAttributes(attributes); - return attributes.getCodeRange(); - } else { - return codeRange; - } - } - - public int rawCharacterLength() { - return characterLength; - } - - @Override - public int characterLength() { - if (characterLength == UNKNOWN_CHARACTER_LENGTH) { - final StringAttributes attributes = RopeOperations - .calculateCodeRangeAndLength(getEncoding(), getBytes(), 0, byteLength()); - updateAttributes(attributes); - return attributes.getCharacterLength(); - } else { - return characterLength; - } - } - - public void clearCodeRange() { - this.characterLength = UNKNOWN_CHARACTER_LENGTH; - this.codeRange = CodeRange.CR_UNKNOWN; - } - - public void updateAttributes(StringAttributes attributes) { - this.characterLength = attributes.getCharacterLength(); - this.codeRange = attributes.getCodeRange(); - } - - public byte[] getBytes(int byteOffset, int byteLength) { - final byte[] bytes = new byte[byteLength]; - copyTo(byteOffset, bytes, 0, byteLength); - return bytes; - } - - @TruffleBoundary - public void copyTo(int byteOffset, byte[] dest, int bufferPos, int byteLength) { - pointer.readBytes(byteOffset, dest, bufferPos, byteLength); - } - - @Override - public byte getByteSlow(int index) { - return get(index); - } - - @Override - public byte get(int index) { - assert 0 <= index && index < pointer.getSize(); - return pointer.readByte(index); - } - - public void set(int index, int value) { - assert 0 <= index && index < pointer.getSize(); - assert value >= -128 && value < 256; - - if (!(codeRange == CodeRange.CR_7BIT && StringSupport.isAsciiCodepoint(value))) { - clearCodeRange(); - } - - pointer.writeByte(index, (byte) value); - } - - @Override - public int hashCode() { - // TODO (pitr-ch 16-May-2017): this forces Rope#hashCode to be non-final, which is bad for performance - return RopeOperations.hashForRange(this, 1, 0, byteLength()); - } - - @Override - Rope withEncoding7bit(Encoding newEncoding, ConditionProfile bytesNotNull) { - return withEncoding(newEncoding); - } - - @Override - Rope withBinaryEncoding(ConditionProfile bytesNotNull) { - return withEncoding(ASCIIEncoding.INSTANCE); - } - - NativeRope withEncoding(Encoding newEncoding) { - return new NativeRope(pointer, byteLength(), newEncoding, UNKNOWN_CHARACTER_LENGTH, CodeRange.CR_UNKNOWN); - } - - public Pointer getNativePointer() { - return pointer; - } - - public long getCapacity() { - final long nativeBufferSize = pointer.getSize(); - assert nativeBufferSize > 0; - // Do not count the extra byte for \0, like MRI. - return nativeBufferSize - 1; - } - - @TruffleBoundary - public LeafRope toLeafRope() { - return RopeOperations.create(getBytes(), getEncoding(), CodeRange.CR_UNKNOWN); - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/RepeatingRope.java b/src/main/java/org/truffleruby/core/rope/RepeatingRope.java deleted file mode 100644 index 1ed5bde2b229..000000000000 --- a/src/main/java/org/truffleruby/core/rope/RepeatingRope.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2016, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ - -package org.truffleruby.core.rope; - -import com.oracle.truffle.api.profiles.ConditionProfile; -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; - -/** A RepeatingRope always has the same encoding as its child */ -public class RepeatingRope extends ManagedRope { - - private final ManagedRope child; - private final int times; - - public RepeatingRope(ManagedRope child, int times, int byteLength) { - super( - child.getEncoding(), - child.getCodeRange(), - byteLength, - child.characterLength() * times, - null); - this.child = child; - this.times = times; - } - - @Override - Rope withEncoding7bit(Encoding newEncoding, ConditionProfile bytesNotNull) { - assert getCodeRange() == CodeRange.CR_7BIT; - return new RepeatingRope((ManagedRope) RopeOperations.withEncoding(child, newEncoding), times, byteLength()); - } - - @Override - Rope withBinaryEncoding(ConditionProfile bytesNotNull) { - assert getCodeRange() == CodeRange.CR_VALID; - return new RepeatingRope( - (ManagedRope) RopeOperations.withEncoding(child, ASCIIEncoding.INSTANCE), - times, - byteLength()); - } - - @Override - protected byte[] getBytesSlow() { - if (child.getRawBytes() != null) { - final byte[] childBytes = child.getRawBytes(); - int len = childBytes.length * times; - final byte[] ret = new byte[len]; - - int n = childBytes.length; - - System.arraycopy(childBytes, 0, ret, 0, n); - while (n <= len / 2) { - System.arraycopy(ret, 0, ret, n, n); - n *= 2; - } - System.arraycopy(ret, 0, ret, n, len - n); - - return ret; - } - - return super.getBytesSlow(); - } - - public ManagedRope getChild() { - return child; - } - - public int getTimes() { - return times; - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/Rope.java b/src/main/java/org/truffleruby/core/rope/Rope.java deleted file mode 100644 index 49458dc8f61c..000000000000 --- a/src/main/java/org/truffleruby/core/rope/Rope.java +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Copyright (c) 2015, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.rope; - -import java.lang.management.ManagementFactory; -import java.util.Arrays; -import java.util.List; - -import com.oracle.truffle.api.profiles.ConditionProfile; -import org.jcodings.Encoding; - -import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; - -public abstract class Rope implements Comparable { - - // NativeRope, RepeatingRope, 3 LeafRope, ConcatRope, SubstringRope, 1 LazyRope - public static final int NUMBER_OF_CONCRETE_CLASSES = 8; - - public final Encoding encoding; - private final int byteLength; - private int hashCode = 0; - protected byte[] bytes; - - protected Rope(Encoding encoding, int byteLength, byte[] bytes) { - assert encoding != null; - - this.encoding = encoding; - this.byteLength = byteLength; - this.bytes = bytes; - } - - /** Only used internally by WithEncodingNode. Returns a Rope with the given Encoding. Both the original and new - * Encodings must be ASCII-compatible and the rope must be {@link #isAsciiOnly()}. */ - abstract Rope withEncoding7bit(Encoding newEncoding, ConditionProfile bytesNotNull); - - /** Only used internally by WithEncodingNode. Returns a Rope with the BINARY Encoding. The original Encoding must be - * ASCII-compatible and {@link #getCodeRange()} must be {@link CodeRange#CR_VALID} to call this. */ - abstract Rope withBinaryEncoding(ConditionProfile bytesNotNull); - - public abstract int characterLength(); - - public final int byteLength() { - return byteLength; - } - - public final boolean isEmpty() { - return byteLength == 0; - } - - protected abstract byte getByteSlow(int index); - - public final byte[] getRawBytes() { - return bytes; - } - - public abstract byte[] getBytes(); - - /** The caller of this method will cache the resulting byte[]. */ - protected byte[] getBytesSlow() { - return RopeOperations.flattenBytes(this); - } - - public final byte[] getBytesCopy() { - return getBytes().clone(); - } - - public final Encoding getEncoding() { - return encoding; - } - - public abstract CodeRange getCodeRange(); - - public final boolean isSingleByteOptimizable() { - return getCodeRange() == CodeRange.CR_7BIT || getEncoding().isSingleByte(); - } - - public final boolean isAsciiOnly() { - return getCodeRange() == CodeRange.CR_7BIT; - } - - @Override - @TruffleBoundary - public int hashCode() { - if (!isHashCodeCalculated()) { - hashCode = RopeOperations.hashForRange(this, 1, 0, byteLength); - } - - return hashCode; - } - - public final boolean isHashCodeCalculated() { - return hashCode != 0; - } - - public final int calculatedHashCode() { - return hashCode; - } - - @TruffleBoundary - public boolean bytesEqual(Rope other) { - /* What is the right strategy to compare ropes for byte equality? There are lots of options. We're going to - * force and compare the hash codes, and then flatten for a byte equality. Both the intermediate hash - * generations of the nodes, and the final Array.equals if needed, should have good inner-loop - * implementations. */ - return this.hashCode() == other.hashCode() && Arrays.equals(this.getBytes(), other.getBytes()); - } - - @Override - @TruffleBoundary - public int compareTo(Rope other) { - final byte[] selfBytes = getBytes(); - final byte[] otherBytes = other.getBytes(); - final int selfLen = selfBytes.length; - final int otherLen = otherBytes.length; - final int compareLen = Math.min(selfLen, otherLen); - int i = 0; - while (i < compareLen) { - final byte selfByte = selfBytes[i]; - final byte otherByte = otherBytes[i]; - if (selfByte != otherByte) { - return selfByte - otherByte; - } - i++; - } - return selfLen - otherLen; - } - - @Override - public final boolean equals(Object o) { - if (this == o) { - return true; - } - - if (o instanceof Rope) { - final Rope other = (Rope) o; - - if (isHashCodeCalculated() && other.isHashCodeCalculated() && (hashCode != other.hashCode)) { - return false; - } - - return encoding == other.getEncoding() && byteLength() == other.byteLength() && - Arrays.equals(getBytes(), other.getBytes()); - } - - return false; - } - - public byte get(int index) { - if (bytes != null) { - return bytes[index]; - } - - return getByteSlow(index); - } - - private static boolean isJavaDebuggerAttached() { - final List inputArguments = ManagementFactory.getRuntimeMXBean().getInputArguments(); - for (String arg : inputArguments) { - if (arg.contains("jdwp")) { - return true; - } - } - return false; - } - - static final boolean JAVA_DEBUGGER = isJavaDebuggerAttached(); - - /** This is designed to not have any side effects - compare to {@link #getJavaString} - but this makes it - * inefficient - for debugging only */ - @Override - public String toString() { - assert JAVA_DEBUGGER - : "Rope#toString() should only be called by Java debuggers, use RubyStringLibrary or RopeOperations.decodeRope() instead"; - return RopeOperations.decode(encoding, RopeOperations.flattenBytes(this)); - } - - /** Should only be used by the parser - it has side effects */ - public final String getJavaString() { - return RopeOperations.decodeRope(this); - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/RopeBuilder.java b/src/main/java/org/truffleruby/core/rope/RopeBuilder.java deleted file mode 100644 index 16209312077e..000000000000 --- a/src/main/java/org/truffleruby/core/rope/RopeBuilder.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2016, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.rope; - -import static org.truffleruby.core.rope.CodeRange.CR_UNKNOWN; - -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; -import org.truffleruby.collections.ByteArrayBuilder; - -public class RopeBuilder extends ByteArrayBuilder { - - private Encoding encoding = ASCIIEncoding.INSTANCE; - - public RopeBuilder() { - super(); - } - - public RopeBuilder(int size) { - super(size); - } - - public static RopeBuilder createRopeBuilder(int size) { - return new RopeBuilder(size); - } - - public static RopeBuilder createRopeBuilder(byte[] bytes, Encoding encoding) { - final RopeBuilder builder = new RopeBuilder(bytes.length); - builder.append(bytes); - builder.setEncoding(encoding); - return builder; - } - - public static RopeBuilder createRopeBuilder(byte[] wrap) { - final RopeBuilder builder = new RopeBuilder(wrap.length); - builder.append(wrap); - return builder; - } - - public static RopeBuilder createRopeBuilder(byte[] wrap, int index, int len) { - final RopeBuilder builder = new RopeBuilder(len); - builder.append(wrap, index, len); - return builder; - } - - public Encoding getEncoding() { - return encoding; - } - - public void setEncoding(Encoding encoding) { - this.encoding = encoding; - } - - public void append(Rope other) { - append(other.getBytes()); - } - - public ManagedRope toRope() { - return toRope(CR_UNKNOWN); - } - - public ManagedRope toRope(CodeRange codeRange) { - // TODO CS 17-Jan-16 can we take the bytes from the RopeBuilder and set its bytes to null so it can't use them again - return RopeOperations.create(getBytes(), encoding, codeRange); - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/RopeCache.java b/src/main/java/org/truffleruby/core/rope/RopeCache.java deleted file mode 100644 index 72e2c87ec481..000000000000 --- a/src/main/java/org/truffleruby/core/rope/RopeCache.java +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2013, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.rope; - -import com.oracle.truffle.api.CompilerDirectives; -import org.jcodings.Encoding; -import org.truffleruby.collections.WeakValueCache; -import org.truffleruby.core.string.FrozenStrings; -import org.truffleruby.core.symbol.CoreSymbols; -import org.truffleruby.core.symbol.RubySymbol; - -import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; - -public class RopeCache { - - private final WeakValueCache bytesToRope = new WeakValueCache<>(); - - private int byteArrayReusedCount; - private int ropesReusedCount; - private int ropeBytesSaved; - - public RopeCache(CoreSymbols coreSymbols) { - addRopeConstants(); - addCoreSymbolRopes(coreSymbols); - addFrozenStrings(); - } - - private void addFrozenStrings() { - for (LeafRope rope : FrozenStrings.ROPES) { - register(rope); - } - } - - private void addRopeConstants() { - for (LeafRope rope : RopeConstants.UTF8_SINGLE_BYTE_ROPES) { - register(rope); - } - for (LeafRope rope : RopeConstants.US_ASCII_SINGLE_BYTE_ROPES) { - register(rope); - } - for (LeafRope rope : RopeConstants.ASCII_8BIT_SINGLE_BYTE_ROPES) { - register(rope); - } - for (LeafRope rope : RopeConstants.ROPE_CONSTANTS.values()) { - register(rope); - } - } - - private void addCoreSymbolRopes(CoreSymbols coreSymbols) { - for (RubySymbol symbol : coreSymbols.CORE_SYMBOLS) { - register(symbol.getRope()); - } - } - - private void register(LeafRope rope) { - final BytesKey key = new BytesKey(rope.getBytes(), rope.getEncoding()); - final Rope existing = bytesToRope.put(key, rope); - if (existing != null && existing != rope) { - throw CompilerDirectives.shouldNotReachHere("Duplicate Rope in RopeCache: " + existing); - } - } - - public LeafRope getRope(Rope string) { - return getRope(string.getBytes(), string.getEncoding(), string.getCodeRange()); - } - - @TruffleBoundary - public LeafRope getRope(byte[] bytes, Encoding encoding, CodeRange codeRange) { - assert encoding != null; - - final BytesKey key = new BytesKey(bytes, encoding); - - final LeafRope rope = bytesToRope.get(key); - if (rope != null) { - ++ropesReusedCount; - ropeBytesSaved += rope.byteLength(); - - return rope; - } - - // At this point, we were unable to find a rope with the same bytes and encoding (i.e., a direct match). - // However, there may still be a rope with the same byte[] and sharing a direct byte[] can still allow some - // reference equality optimizations. So, do another search but with a marker encoding. The only guarantee - // we can make about the resulting rope is that it would have the same logical byte[], but that's good enough - // for our purposes. - final Rope ropeWithSameBytesButDifferentEncoding = bytesToRope.get(new BytesKey(bytes, null)); - - final LeafRope newRope; - if (ropeWithSameBytesButDifferentEncoding != null) { - newRope = RopeOperations.create(ropeWithSameBytesButDifferentEncoding.getBytes(), encoding, codeRange); - - ++byteArrayReusedCount; - ropeBytesSaved += newRope.byteLength(); - } else { - newRope = RopeOperations.create(bytes, encoding, codeRange); - } - - // Use the new Rope bytes in the cache, so we do not keep bytes alive unnecessarily. - final BytesKey newKey = new BytesKey(newRope.getBytes(), newRope.getEncoding()); - return bytesToRope.addInCacheIfAbsent(newKey, newRope); - } - - public boolean contains(Rope rope) { - final BytesKey key = new BytesKey(rope.getBytes(), rope.getEncoding()); - - return bytesToRope.get(key) != null; - } - - public int getByteArrayReusedCount() { - return byteArrayReusedCount; - } - - public int getRopesReusedCount() { - return ropesReusedCount; - } - - public int getRopeBytesSaved() { - return ropeBytesSaved; - } - - public int totalRopes() { - return bytesToRope.size(); - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/RopeConstants.java b/src/main/java/org/truffleruby/core/rope/RopeConstants.java deleted file mode 100644 index 0f77ce81790b..000000000000 --- a/src/main/java/org/truffleruby/core/rope/RopeConstants.java +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright (c) 2016, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.rope; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; - -import com.oracle.truffle.api.CompilerDirectives.CompilationFinal; -import org.jcodings.specific.ASCIIEncoding; -import org.jcodings.specific.USASCIIEncoding; -import org.jcodings.specific.UTF8Encoding; - -public class RopeConstants { - - public static final Map ROPE_CONSTANTS = new HashMap<>(); - - public static final byte[] EMPTY_BYTES = new byte[0]; - - public static final LeafRope EMPTY_ASCII_8BIT_ROPE = withHashCode( - new AsciiOnlyLeafRope(EMPTY_BYTES, ASCIIEncoding.INSTANCE)); - public static final LeafRope EMPTY_US_ASCII_ROPE = withHashCode( - new AsciiOnlyLeafRope(EMPTY_BYTES, USASCIIEncoding.INSTANCE)); - public static final LeafRope EMPTY_UTF8_ROPE = withHashCode( - new AsciiOnlyLeafRope(EMPTY_BYTES, UTF8Encoding.INSTANCE)); - - @CompilationFinal(dimensions = 1) public static final LeafRope[] UTF8_SINGLE_BYTE_ROPES = new LeafRope[256]; - @CompilationFinal(dimensions = 1) public static final LeafRope[] US_ASCII_SINGLE_BYTE_ROPES = new LeafRope[256]; - @CompilationFinal(dimensions = 1) public static final LeafRope[] ASCII_8BIT_SINGLE_BYTE_ROPES = new LeafRope[256]; - - static { - for (int i = 0; i < 128; i++) { - final byte[] bytes = new byte[]{ (byte) i }; - - UTF8_SINGLE_BYTE_ROPES[i] = withHashCode(new AsciiOnlyLeafRope(bytes, UTF8Encoding.INSTANCE)); - US_ASCII_SINGLE_BYTE_ROPES[i] = withHashCode(new AsciiOnlyLeafRope(bytes, USASCIIEncoding.INSTANCE)); - ASCII_8BIT_SINGLE_BYTE_ROPES[i] = withHashCode(new AsciiOnlyLeafRope(bytes, ASCIIEncoding.INSTANCE)); - } - - for (int i = 128; i < 256; i++) { - final byte[] bytes = new byte[]{ (byte) i }; - - UTF8_SINGLE_BYTE_ROPES[i] = withHashCode(new InvalidLeafRope(bytes, UTF8Encoding.INSTANCE, 1)); - US_ASCII_SINGLE_BYTE_ROPES[i] = withHashCode(new InvalidLeafRope(bytes, USASCIIEncoding.INSTANCE, 1)); - ASCII_8BIT_SINGLE_BYTE_ROPES[i] = withHashCode(new ValidLeafRope(bytes, ASCIIEncoding.INSTANCE, 1)); - } - } - - public static final Rope AMPERSAND = ascii("&"); - public static final Rope AMPERSAND_AMPERSAND = ascii("&&"); - public static final Rope AMPERSAND_DOT = ascii("&."); - public static final Rope BACKTICK = ascii("`"); - public static final Rope BACKSLASH = ascii("\\"); - public static final Rope BANG = ascii("!"); - public static final Rope BANG_EQ = ascii("!="); - public static final Rope BANG_TILDE = ascii("!~"); - public static final Rope CALL = ascii("call"); - public static final Rope CARET = ascii("^"); - public static final Rope COLON = ascii(":"); - public static final Rope COLON_COLON = ascii("::"); - public static final Rope COMMA = ascii(","); - public static final Rope DOT = ascii("."); - public static final Rope DOT_DOT = ascii(".."); - public static final Rope DOT_DOT_DOT = ascii("..."); - public static final Rope DOLLAR_BANG = ascii("$!"); - public static final Rope DOLLAR_ZERO = ascii("$0"); - public static final Rope EQ = ascii("="); - public static final Rope EQ_EQ = ascii("=="); - public static final Rope EQ_EQ_EQ = ascii("==="); - public static final Rope EQ_GT = ascii("=>"); - public static final Rope EQ_TILDE = ascii("=~"); - public static final Rope FALSE = ascii("false"); - public static final Rope GT = ascii(">"); - public static final Rope GT_EQ = ascii(">="); - public static final Rope GT_GT = ascii(">>"); - public static final Rope LBRACKET = ascii("["); - public static final Rope LBRACKET_RBRACKET = ascii("[]"); - public static final Rope LBRACKET_RBRACKET_EQ = ascii("[]="); - public static final Rope LCURLY = ascii("{"); - public static final Rope LT = ascii("<"); - public static final Rope LT_EQ = ascii("<="); - public static final Rope LT_EQ_GT = ascii("<=>"); - public static final Rope LT_LT = ascii("<<"); - public static final Rope MINUS = ascii("-"); - public static final Rope MINUS_AT = ascii("-@"); - public static final Rope MINUS_GT = ascii("->"); - public static final Rope NIL = ascii("nil"); - public static final Rope OR = ascii("|"); - public static final Rope OR_OR = ascii("||"); - public static final Rope PERCENT = ascii("%"); - public static final Rope PLUS = ascii("+"); - public static final Rope PLUS_AT = ascii("+@"); - public static final Rope Q = ascii("'"); - public static final Rope QQ = ascii("\""); - public static final Rope QUESTION = ascii("?"); - public static final Rope RBRACKET = ascii("]"); - public static final Rope RCURLY = ascii("}"); - public static final Rope RPAREN = ascii(")"); - public static final Rope SEMICOLON = ascii(";"); - public static final Rope SLASH = ascii("/"); - public static final Rope STAR = ascii("*"); - public static final Rope STAR_STAR = ascii("**"); - public static final Rope TILDE = ascii("~"); - public static final Rope TRUE = ascii("true"); - - - // Encoding names, generated by: - // names = Encoding.list.map { |e| e.name } - // names.each { |n| puts "public static final Rope #{n.upcase.gsub('-','_')} = ascii(\"#{n}\");" } - public static final Rope ASCII_8BIT = ascii("ASCII-8BIT"); - public static final Rope US_ASCII = ascii("US-ASCII"); - public static final Rope UTF_8 = ascii("UTF-8"); - public static final Rope BIG5 = ascii("Big5"); - public static final Rope BIG5_HKSCS = ascii("Big5-HKSCS"); - public static final Rope BIG5_UAO = ascii("Big5-UAO"); - public static final Rope CP949 = ascii("CP949"); - public static final Rope EMACS_MULE = ascii("Emacs-Mule"); - public static final Rope EUC_JP = ascii("EUC-JP"); - public static final Rope EUC_KR = ascii("EUC-KR"); - public static final Rope EUC_TW = ascii("EUC-TW"); - public static final Rope GB18030 = ascii("GB18030"); - public static final Rope GBK = ascii("GBK"); - public static final Rope ISO_8859_1 = ascii("ISO-8859-1"); - public static final Rope ISO_8859_2 = ascii("ISO-8859-2"); - public static final Rope ISO_8859_3 = ascii("ISO-8859-3"); - public static final Rope ISO_8859_4 = ascii("ISO-8859-4"); - public static final Rope ISO_8859_5 = ascii("ISO-8859-5"); - public static final Rope ISO_8859_6 = ascii("ISO-8859-6"); - public static final Rope ISO_8859_7 = ascii("ISO-8859-7"); - public static final Rope ISO_8859_8 = ascii("ISO-8859-8"); - public static final Rope ISO_8859_9 = ascii("ISO-8859-9"); - public static final Rope ISO_8859_10 = ascii("ISO-8859-10"); - public static final Rope ISO_8859_11 = ascii("ISO-8859-11"); - public static final Rope ISO_8859_13 = ascii("ISO-8859-13"); - public static final Rope ISO_8859_14 = ascii("ISO-8859-14"); - public static final Rope ISO_8859_15 = ascii("ISO-8859-15"); - public static final Rope ISO_8859_16 = ascii("ISO-8859-16"); - public static final Rope KOI8_R = ascii("KOI8-R"); - public static final Rope KOI8_U = ascii("KOI8-U"); - public static final Rope SHIFT_JIS = ascii("Shift_JIS"); - public static final Rope UTF_16BE = ascii("UTF-16BE"); - public static final Rope UTF_16LE = ascii("UTF-16LE"); - public static final Rope UTF_32BE = ascii("UTF-32BE"); - public static final Rope UTF_32LE = ascii("UTF-32LE"); - public static final Rope WINDOWS_31J = ascii("Windows-31J"); - public static final Rope WINDOWS_1250 = ascii("Windows-1250"); - public static final Rope WINDOWS_1251 = ascii("Windows-1251"); - public static final Rope WINDOWS_1252 = ascii("Windows-1252"); - public static final Rope WINDOWS_1253 = ascii("Windows-1253"); - public static final Rope WINDOWS_1254 = ascii("Windows-1254"); - public static final Rope WINDOWS_1257 = ascii("Windows-1257"); - public static final Rope IBM437 = ascii("IBM437"); - public static final Rope IBM737 = ascii("IBM737"); - public static final Rope IBM775 = ascii("IBM775"); - public static final Rope CP850 = ascii("CP850"); - public static final Rope IBM852 = ascii("IBM852"); - public static final Rope CP852 = ascii("CP852"); - public static final Rope IBM855 = ascii("IBM855"); - public static final Rope CP855 = ascii("CP855"); - public static final Rope IBM857 = ascii("IBM857"); - public static final Rope IBM860 = ascii("IBM860"); - public static final Rope IBM861 = ascii("IBM861"); - public static final Rope IBM862 = ascii("IBM862"); - public static final Rope IBM863 = ascii("IBM863"); - public static final Rope IBM864 = ascii("IBM864"); - public static final Rope IBM865 = ascii("IBM865"); - public static final Rope IBM866 = ascii("IBM866"); - public static final Rope IBM869 = ascii("IBM869"); - public static final Rope WINDOWS_1258 = ascii("Windows-1258"); - public static final Rope GB1988 = ascii("GB1988"); - public static final Rope MACCENTEURO = ascii("macCentEuro"); - public static final Rope MACCROATIAN = ascii("macCroatian"); - public static final Rope MACCYRILLIC = ascii("macCyrillic"); - public static final Rope MACGREEK = ascii("macGreek"); - public static final Rope MACICELAND = ascii("macIceland"); - public static final Rope MACROMAN = ascii("macRoman"); - public static final Rope MACROMANIA = ascii("macRomania"); - public static final Rope MACTHAI = ascii("macThai"); - public static final Rope MACTURKISH = ascii("macTurkish"); - public static final Rope MACUKRAINE = ascii("macUkraine"); - public static final Rope CP950 = ascii("CP950"); - public static final Rope CP951 = ascii("CP951"); - public static final Rope IBM037 = ascii("IBM037"); - public static final Rope STATELESS_ISO_2022_JP = ascii("stateless-ISO-2022-JP"); - public static final Rope EUCJP_MS = ascii("eucJP-ms"); - public static final Rope CP51932 = ascii("CP51932"); - public static final Rope EUC_JIS_2004 = ascii("EUC-JIS-2004"); - public static final Rope GB2312 = ascii("GB2312"); - public static final Rope GB12345 = ascii("GB12345"); - public static final Rope ISO_2022_JP = ascii("ISO-2022-JP"); - public static final Rope ISO_2022_JP_2 = ascii("ISO-2022-JP-2"); - public static final Rope CP50220 = ascii("CP50220"); - public static final Rope CP50221 = ascii("CP50221"); - public static final Rope WINDOWS_1256 = ascii("Windows-1256"); - public static final Rope WINDOWS_1255 = ascii("Windows-1255"); - public static final Rope TIS_620 = ascii("TIS-620"); - public static final Rope WINDOWS_874 = ascii("Windows-874"); - public static final Rope MACJAPANESE = ascii("MacJapanese"); - public static final Rope UTF_7 = ascii("UTF-7"); - public static final Rope UTF8_MAC = ascii("UTF8-MAC"); - public static final Rope UTF_16 = ascii("UTF-16"); - public static final Rope UTF_32 = ascii("UTF-32"); - public static final Rope UTF8_DOCOMO = ascii("UTF8-DoCoMo"); - public static final Rope SJIS_DOCOMO = ascii("SJIS-DoCoMo"); - public static final Rope UTF8_KDDI = ascii("UTF8-KDDI"); - public static final Rope SJIS_KDDI = ascii("SJIS-KDDI"); - public static final Rope ISO_2022_JP_KDDI = ascii("ISO-2022-JP-KDDI"); - public static final Rope STATELESS_ISO_2022_JP_KDDI = ascii("stateless-ISO-2022-JP-KDDI"); - public static final Rope UTF8_SOFTBANK = ascii("UTF8-SoftBank"); - public static final Rope SJIS_SOFTBANK = ascii("SJIS-SoftBank"); - - private static Rope ascii(String string) { - if (string.length() == 1) { - return US_ASCII_SINGLE_BYTE_ROPES[string.charAt(0)]; - } else { - final byte[] bytes = RopeOperations.encodeAsciiBytes(string); - final LeafRope rope = withHashCode(new AsciiOnlyLeafRope(bytes, USASCIIEncoding.INSTANCE)); - final Rope existing = ROPE_CONSTANTS.putIfAbsent(string, rope); - if (existing != null) { - throw new AssertionError("Duplicate Rope in RopeConstants: " + existing); - } - return rope; - } - } - - public static LeafRope lookupUSASCII(String string) { - if (string.length() == 1) { - return US_ASCII_SINGLE_BYTE_ROPES[string.charAt(0)]; - } else { - return ROPE_CONSTANTS.get(string); - } - } - - @CompilationFinal(dimensions = 1) private static final LeafRope[] PADDED_NUMBERS = createPaddedNumbersTable(); - - private static LeafRope[] createPaddedNumbersTable() { - final LeafRope[] table = new LeafRope[100]; - - for (int n = 0; n < table.length; n++) { - table[n] = new AsciiOnlyLeafRope( - new byte[]{ (byte) ('0' + n / 10), (byte) ('0' + n % 10) }, - UTF8Encoding.INSTANCE); - } - - return table; - } - - /*** Zero-padded numbers in the format %02d, between 00 and 99. */ - public static LeafRope paddedNumber(int n) { - return PADDED_NUMBERS[n]; - } - - @CompilationFinal(dimensions = 1) private static final LeafRope[] PADDING_ZEROS = createPaddingZeroTable(); - - private static LeafRope[] createPaddingZeroTable() { - final LeafRope[] table = new LeafRope[6]; - - for (int n = 0; n < table.length; n++) { - final byte[] bytes = new byte[n]; - - Arrays.fill(bytes, (byte) '0'); - - table[n] = new AsciiOnlyLeafRope(bytes, UTF8Encoding.INSTANCE); - } - - return table; - } - - public static LeafRope paddingZeros(int n) { - return PADDING_ZEROS[n]; - } - - private static T withHashCode(T object) { - object.hashCode(); - return object; - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/RopeGuards.java b/src/main/java/org/truffleruby/core/rope/RopeGuards.java deleted file mode 100644 index 150fa9631038..000000000000 --- a/src/main/java/org/truffleruby/core/rope/RopeGuards.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2016, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ - - -package org.truffleruby.core.rope; - -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; - -public class RopeGuards { - - public static boolean isSingleByteString(Rope rope) { - return rope.byteLength() == 1; - } - - public static boolean isLeafRope(Rope rope) { - return rope instanceof LeafRope; - } - - public static boolean isEmpty(byte[] bytes) { - return bytes.length == 0; - } - - public static boolean isBinaryString(Encoding encoding) { - return encoding == ASCIIEncoding.INSTANCE; - } - - public static boolean isAsciiCompatible(Encoding encoding) { - return encoding.isAsciiCompatible(); - } - - public static boolean isFixedWidthEncoding(Rope rope) { - return rope.getEncoding().isFixedWidth(); - } - - public static boolean is7Bit(Rope rope, RopeNodes.CodeRangeNode codeRangeNode) { - return codeRangeNode.execute(rope) == CodeRange.CR_7BIT; - } - - public static boolean isBroken(Rope rope, RopeNodes.CodeRangeNode codeRangeNode) { - return codeRangeNode.execute(rope) == CodeRange.CR_BROKEN; - } - - public static boolean isAsciiCompatible(Rope rope) { - return rope.getEncoding().isAsciiCompatible(); - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/RopeNodes.java b/src/main/java/org/truffleruby/core/rope/RopeNodes.java deleted file mode 100644 index 9743ad4e4c20..000000000000 --- a/src/main/java/org/truffleruby/core/rope/RopeNodes.java +++ /dev/null @@ -1,1849 +0,0 @@ -/* - * Copyright (c) 2016, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - * - * - * Some of the code in this class is modified from org.jruby.util.StringSupport, - * licensed under the same EPL 2.0/GPL 2.0/LGPL 2.1 used throughout. - */ - -package org.truffleruby.core.rope; - -import static org.truffleruby.core.rope.CodeRange.CR_7BIT; -import static org.truffleruby.core.rope.CodeRange.CR_BROKEN; -import static org.truffleruby.core.rope.CodeRange.CR_UNKNOWN; -import static org.truffleruby.core.rope.CodeRange.CR_VALID; - -import java.util.Arrays; - -import com.oracle.truffle.api.TruffleSafepoint; -import com.oracle.truffle.api.dsl.Bind; -import com.oracle.truffle.api.dsl.Cached.Exclusive; -import com.oracle.truffle.api.dsl.Cached.Shared; -import com.oracle.truffle.api.profiles.LoopConditionProfile; -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; -import org.jcodings.specific.USASCIIEncoding; -import org.jcodings.specific.UTF8Encoding; -import org.truffleruby.SuppressFBWarnings; -import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.ConcatRope.ConcatState; -import org.truffleruby.core.rope.RopeNodesFactory.AreComparableRopesNodeGen; -import org.truffleruby.core.rope.RopeNodesFactory.CompareRopesNodeGen; -import org.truffleruby.core.rope.RopeNodesFactory.SetByteNodeGen; -import org.truffleruby.core.string.StringAttributes; -import org.truffleruby.core.string.StringSupport; -import org.truffleruby.language.NotProvided; -import org.truffleruby.language.RubyBaseNode; -import org.truffleruby.language.control.RaiseException; -import org.truffleruby.utils.Utils; - -import com.oracle.truffle.api.CompilerDirectives; -import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; -import com.oracle.truffle.api.dsl.Cached; -import com.oracle.truffle.api.dsl.Fallback; -import com.oracle.truffle.api.dsl.GenerateUncached; -import com.oracle.truffle.api.dsl.ImportStatic; -import com.oracle.truffle.api.dsl.Specialization; -import com.oracle.truffle.api.nodes.SlowPathException; -import com.oracle.truffle.api.profiles.BranchProfile; -import com.oracle.truffle.api.profiles.ConditionProfile; - -public abstract class RopeNodes { - - // Preserves encoding of the top-level Rope - @GenerateUncached - public abstract static class SubstringNode extends RubyBaseNode { - - public static SubstringNode create() { - return RopeNodesFactory.SubstringNodeGen.create(); - } - - public abstract Rope executeSubstring(Rope base, int byteOffset, int byteLength); - - @Specialization(guards = "byteLength == 0") - protected Rope substringZeroBytes(Rope base, int byteOffset, int byteLength, - @Cached MakeLeafRopeNode makeLeafRopeNode) { - return makeLeafRopeNode.executeMake(RopeConstants.EMPTY_BYTES, base.getEncoding(), CR_UNKNOWN, 0); - } - - @Specialization(guards = "byteLength == 1") - protected Rope substringOneByte(Rope base, int byteOffset, int byteLength, - @Cached ConditionProfile isUTF8, - @Cached ConditionProfile isUSAscii, - @Cached ConditionProfile isAscii8Bit, - @Cached GetByteNode getByteNode, - @Cached WithEncodingNode withEncodingNode) { - final int index = getByteNode.executeGetByte(base, byteOffset); - - if (isUTF8.profile(base.getEncoding() == UTF8Encoding.INSTANCE)) { - return RopeConstants.UTF8_SINGLE_BYTE_ROPES[index]; - } - - if (isUSAscii.profile(base.getEncoding() == USASCIIEncoding.INSTANCE)) { - return RopeConstants.US_ASCII_SINGLE_BYTE_ROPES[index]; - } - - if (isAscii8Bit.profile(base.getEncoding() == ASCIIEncoding.INSTANCE)) { - return RopeConstants.ASCII_8BIT_SINGLE_BYTE_ROPES[index]; - } - - return withEncodingNode - .executeWithEncoding(RopeConstants.ASCII_8BIT_SINGLE_BYTE_ROPES[index], base.getEncoding()); - } - - @Specialization(guards = { "byteLength > 1", "sameAsBase(base, byteLength)" }) - protected Rope substringSameAsBase(Rope base, int byteOffset, int byteLength) { - return base; - } - - @Specialization(guards = { "byteLength > 1", "!sameAsBase(base, byteLength)" }) - protected Rope substringLeafRope(LeafRope base, int byteOffset, int byteLength, - @Cached MakeSubstringRopeNode makeSubstringRopeNode) { - return makeSubstringRopeNode.executeMake(base.getEncoding(), base, byteOffset, byteLength); - } - - @Specialization(guards = { "byteLength > 1", "!sameAsBase(base, byteLength)" }) - protected Rope substringSubstringRope(SubstringRope base, int byteOffset, int byteLength, - @Cached MakeSubstringRopeNode makeSubstringRopeNode) { - return substringSubstringRopeWithEncoding( - base.getEncoding(), - base, - byteOffset, - byteLength, - makeSubstringRopeNode); - } - - private Rope substringSubstringRopeWithEncoding(Encoding encoding, SubstringRope rope, int byteOffset, - int byteLength, MakeSubstringRopeNode makeSubstringRopeNode) { - return makeSubstringRopeNode - .executeMake(encoding, rope.getChild(), byteOffset + rope.getByteOffset(), byteLength); - } - - @Specialization(guards = { "byteLength > 1", "!sameAsBase(base, byteLength)" }) - protected Rope substringRepeatingRope(RepeatingRope base, int byteOffset, int byteLength, - @Cached WithEncodingNode withEncodingNode, - @Cached MakeSubstringRopeNode makeSubstringRopeNode, - @Cached ConditionProfile matchesChildProfile) { - return substringRepeatingRopeWithEncoding( - base.getEncoding(), - base, - byteOffset, - byteLength, - matchesChildProfile, - makeSubstringRopeNode, - withEncodingNode); - } - - private Rope substringRepeatingRopeWithEncoding(Encoding encoding, RepeatingRope rope, int byteOffset, - int byteLength, ConditionProfile matchesChildProfile, MakeSubstringRopeNode makeSubstringRopeNode, - WithEncodingNode withEncodingNode) { - final boolean offsetFitsChild = byteOffset % rope.getChild().byteLength() == 0; - final boolean byteLengthFitsChild = byteLength == rope.getChild().byteLength(); - - // TODO (nirvdrum 07-Apr-16) We can specialize any number of children that fit perfectly into the length, not just count == 1. But we may need to create a new RepeatingNode to handle count > 1. - if (matchesChildProfile.profile(offsetFitsChild && byteLengthFitsChild)) { - return withEncodingNode.executeWithEncoding(rope.getChild(), encoding); - } - - return makeSubstringRopeNode.executeMake(encoding, rope, byteOffset, byteLength); - } - - @Specialization(guards = { "byteLength > 1", "!sameAsBase(base, byteLength)" }) - protected Rope substringLazyRope(LazyIntRope base, int byteOffset, int byteLength, - @Cached MakeSubstringRopeNode makeSubstringRopeNode) { - return makeSubstringRopeNode.executeMake(base.getEncoding(), base, byteOffset, byteLength); - } - - @Specialization(guards = { "byteLength > 1", "!sameAsBase(base, byteLength)" }) - protected Rope substringNativeRope(NativeRope base, int byteOffset, int byteLength, - @Cached MakeLeafRopeNode makeLeafRopeNode) { - return makeLeafRopeNode.executeMake( - base.getBytes(byteOffset, byteLength), - base.getEncoding(), - CR_UNKNOWN, - NotProvided.INSTANCE); - } - - @Specialization(guards = { "byteLength > 1", "!sameAsBase(base, byteLength)" }) - protected Rope substringConcatRope(ConcatRope base, int byteOffset, int byteLength, - @Cached BytesNode bytesNode, - @Cached MakeSubstringRopeNode makeSubstringRopeNode) { - // NOTE(norswap, 19 Nov 2020): - // We flatten the rope here. This avoids issue in the (fairly common) case where the rope tree is basically - // a linked list. In that case, reading successive substrings causes increasingly bigger concat ropes - // to be flattened. So better to preventively flatten at the top. This is also generally beneficial if - // we shift from a write-heavy load (rope tree creation) to a read-heavy load. - bytesNode.execute(base); // flatten rope - return makeSubstringRopeNode.executeMake(base.getEncoding(), base, byteOffset, byteLength); - } - - protected static boolean sameAsBase(Rope base, int byteLength) { - // A SubstringRope's byte length is not allowed to be larger than its child. Thus, if it has the same - // byte length as its child, it must be logically equivalent to the child. - return byteLength == base.byteLength(); - } - - } - - @GenerateUncached - public abstract static class MakeSubstringRopeNode extends RubyBaseNode { - - public static MakeSubstringRopeNode create() { - return RopeNodesFactory.MakeSubstringRopeNodeGen.create(); - } - - public abstract Rope executeMake(Encoding encoding, Rope base, int byteOffset, int byteLength); - - @Specialization(guards = "base.isAsciiOnly()") - protected Rope makeSubstring7Bit(Encoding encoding, ManagedRope base, int byteOffset, int byteLength) { - return new SubstringRope(encoding, base, byteOffset, byteLength, byteLength, CR_7BIT); - } - - @Specialization(guards = "!base.isAsciiOnly()") - protected Rope makeSubstringNon7Bit(Encoding encoding, ManagedRope base, int byteOffset, int byteLength, - @Cached GetBytesObjectNode getBytesObject, - @Cached CalculateAttributesNode calculateAttributes) { - - final StringAttributes attributes = calculateAttributes - .executeCalculateAttributes(encoding, getBytesObject.execute(base, byteOffset, byteLength)); - - final CodeRange codeRange = attributes.getCodeRange(); - final int characterLength = attributes.getCharacterLength(); - - return new SubstringRope(encoding, base, byteOffset, byteLength, characterLength, codeRange); - } - - @Specialization - protected Rope makeSubstringNativeRope(Encoding encoding, NativeRope base, int byteOffset, int byteLength, - @Cached ConditionProfile asciiOnlyProfile, - @Cached AsciiOnlyNode asciiOnlyNode, - @Cached MakeLeafRopeNode makeLeafRopeNode) { - final byte[] bytes = new byte[byteLength]; - base.copyTo(byteOffset, bytes, 0, byteLength); - - final CodeRange codeRange; - final Object characterLength; - - if (asciiOnlyProfile.profile(asciiOnlyNode.execute(base))) { - codeRange = CR_7BIT; - characterLength = byteLength; - } else { - codeRange = CR_UNKNOWN; - characterLength = NotProvided.INSTANCE; - } - - return makeLeafRopeNode.executeMake(bytes, encoding, codeRange, characterLength); - } - - } - - /** See {@link RopeOperations#calculateCodeRangeAndLength} */ - @ImportStatic(RopeGuards.class) - @GenerateUncached - public abstract static class CalculateAttributesNode extends RubyBaseNode { - - public static CalculateAttributesNode create() { - return RopeNodesFactory.CalculateAttributesNodeGen.create(); - } - - abstract StringAttributes executeCalculateAttributes(Encoding encoding, Bytes bytes); - - @Specialization(guards = "bytes.isEmpty()") - protected StringAttributes calculateAttributesEmpty(Encoding encoding, Bytes bytes, - @Cached ConditionProfile isAsciiCompatible) { - return new StringAttributes( - 0, - isAsciiCompatible.profile(encoding.isAsciiCompatible()) ? CR_7BIT : CR_VALID); - } - - @Specialization(guards = { "!bytes.isEmpty()", "isBinaryString(encoding)" }) - protected StringAttributes calculateAttributesBinaryString(Encoding encoding, Bytes bytes, - @Cached BranchProfile nonAsciiStringProfile) { - CodeRange codeRange = CR_7BIT; - - for (int i = 0; i < bytes.length; i++) { - if (bytes.get(i) < 0) { - nonAsciiStringProfile.enter(); - codeRange = CR_VALID; - break; - } - } - - return new StringAttributes(bytes.length, codeRange); - } - - @Specialization( - rewriteOn = NonAsciiCharException.class, - guards = { "!bytes.isEmpty()", "!isBinaryString(encoding)", "isAsciiCompatible(encoding)" }) - protected StringAttributes calculateAttributesAsciiCompatible(Encoding encoding, Bytes bytes, - @Cached LoopConditionProfile loopProfile) - throws NonAsciiCharException { - // Optimistically assume this string consists only of ASCII characters. If a non-ASCII character is found, - // fail over to a more generalized search. - - int i = 0; - try { - for (; loopProfile.inject(i < bytes.length); i++) { - if (bytes.get(i) < 0) { - throw new NonAsciiCharException(); - } - TruffleSafepoint.poll(this); - } - } finally { - profileAndReportLoopCount(loopProfile, i); - } - - return new StringAttributes(bytes.length, CR_7BIT); - } - - /** See {@link StringSupport#strLengthWithCodeRangeAsciiCompatible} */ - @Specialization( - replaces = "calculateAttributesAsciiCompatible", - guards = { "!bytes.isEmpty()", "!isBinaryString(encoding)", "isAsciiCompatible(encoding)" }) - protected StringAttributes calculateAttributesAsciiCompatibleGeneric(Encoding encoding, Bytes bytes, - @Cached CalculateCharacterLengthNode calculateCharacterLengthNode, - @Cached ConditionProfile validCharacterProfile) { - CodeRange codeRange = CR_7BIT; - int characters = 0; - int p = 0; - final int end = bytes.length; - - while (p < end) { - if (Encoding.isAscii(bytes.get(p))) { - final int multiByteCharacterPosition = StringSupport.searchNonAscii(bytes.sliceRange(p, end)); - - if (multiByteCharacterPosition == -1) { - return new StringAttributes(characters + (end - p), codeRange); - } - - characters += multiByteCharacterPosition; - p += multiByteCharacterPosition; - } - - final int lengthOfCurrentCharacter = calculateCharacterLengthNode - .characterLength(encoding, CR_UNKNOWN, bytes.sliceRange(p, end)); - - if (validCharacterProfile.profile(lengthOfCurrentCharacter > 0)) { - if (codeRange != CR_BROKEN) { - codeRange = CR_VALID; - } - - p += lengthOfCurrentCharacter; - } else { - codeRange = CR_BROKEN; - p++; - } - - characters++; - } - - return new StringAttributes(characters, codeRange); - } - - /** See {@link StringSupport#strLengthWithCodeRangeNonAsciiCompatible} */ - @Specialization(guards = { "!bytes.isEmpty()", "!isBinaryString(encoding)", "!isAsciiCompatible(encoding)" }) - protected StringAttributes calculateAttributesNonAsciiCompatible(Encoding encoding, Bytes bytes, - @Cached CalculateCharacterLengthNode calculateCharacterLengthNode, - @Cached ConditionProfile validCharacterProfile, - @Cached ConditionProfile fixedWidthProfile) { - CodeRange codeRange = CR_VALID; - int characters; - int p = 0; - final int end = bytes.length; - - for (characters = 0; p < end; characters++) { - final int lengthOfCurrentCharacter = calculateCharacterLengthNode - .characterLength(encoding, CR_UNKNOWN, bytes.sliceRange(p, end)); - - if (validCharacterProfile.profile(lengthOfCurrentCharacter > 0)) { - p += lengthOfCurrentCharacter; - } else { - codeRange = CR_BROKEN; - - // If a string is detected as broken and we already know the character length due to a - // fixed width encoding, there's no value in visiting any more bytes. - if (fixedWidthProfile.profile(encoding.isFixedWidth())) { - characters = (bytes.length + encoding.minLength() - 1) / encoding.minLength(); - - return new StringAttributes(characters, CR_BROKEN); - } else { - p += encoding.minLength(); - } - } - } - - return new StringAttributes(characters, codeRange); - } - - protected static final class NonAsciiCharException extends SlowPathException { - private static final long serialVersionUID = 5550642254188358382L; - } - - } - - public abstract static class ConcatNode extends RubyBaseNode { - - public static ConcatNode create() { - return RopeNodesFactory.ConcatNodeGen.create(); - } - - public abstract Rope executeConcat(Rope left, Rope right, Encoding encoding); - - @Specialization - protected Rope concatNativeRopeLeft(NativeRope left, Rope right, Encoding encoding, - @Cached NativeToManagedNode nativeToManagedNode, - @Cached ConditionProfile emptyNativeRopeProfile, - @Cached WithEncodingNode withEncodingNode) { - if (emptyNativeRopeProfile.profile(left.isEmpty())) { - return withEncodingNode.executeWithEncoding(right, encoding); - } else { - return executeConcat(nativeToManagedNode.execute(left), right, encoding); - } - } - - @Specialization - protected Rope concatNativeRopeRight(Rope left, NativeRope right, Encoding encoding, - @Cached NativeToManagedNode nativeToManagedNode, - @Cached ConditionProfile emptyNativeRopeProfile, - @Cached WithEncodingNode withEncodingNode) { - if (emptyNativeRopeProfile.profile(right.isEmpty())) { - return withEncodingNode.executeWithEncoding(left, encoding); - } else { - return executeConcat(left, nativeToManagedNode.execute(right), encoding); - } - } - - @Specialization(guards = "left.isEmpty()") - protected Rope concatLeftEmpty(Rope left, ManagedRope right, Encoding encoding, - @Cached WithEncodingNode withEncodingNode) { - return withEncodingNode.executeWithEncoding(right, encoding); - } - - @Specialization(guards = "right.isEmpty()") - protected Rope concatRightEmpty(ManagedRope left, Rope right, Encoding encoding, - @Cached WithEncodingNode withEncodingNode) { - return withEncodingNode.executeWithEncoding(left, encoding); - } - - @SuppressFBWarnings("RV") - @Specialization(guards = { "!left.isEmpty()", "!right.isEmpty()", "!isCodeRangeBroken(left, right)" }) - protected Rope concat(ManagedRope left, ManagedRope right, Encoding encoding, - @Cached ConditionProfile sameCodeRangeProfile, - @Cached ConditionProfile brokenCodeRangeProfile) { - try { - Math.addExact(left.byteLength(), right.byteLength()); - } catch (ArithmeticException e) { - throw new RaiseException( - getContext(), - getContext().getCoreExceptions().argumentErrorTooLargeString(this)); - } - - return new ConcatRope( - left, - right, - encoding, - commonCodeRange( - left.getCodeRange(), - right.getCodeRange(), - sameCodeRangeProfile, - brokenCodeRangeProfile)); - } - - @SuppressFBWarnings("RV") - @Specialization(guards = { "!left.isEmpty()", "!right.isEmpty()", "isCodeRangeBroken(left, right)" }) - protected Rope concatCrBroken(ManagedRope left, ManagedRope right, Encoding encoding, - @Cached MakeLeafRopeNode makeLeafRopeNode, - @Cached BytesNode leftBytesNode, - @Cached BytesNode rightBytesNode) { - // This specialization was added to a special case where broken code range(s), - // may concat to form a valid code range. - try { - Math.addExact(left.byteLength(), right.byteLength()); - } catch (ArithmeticException e) { - throw new RaiseException( - getContext(), - getContext().getCoreExceptions().argumentErrorTooLargeString(this)); - } - - final byte[] leftBytes = leftBytesNode.execute(left); - final byte[] rightBytes = rightBytesNode.execute(right); - final byte[] bytes = new byte[leftBytes.length + rightBytes.length]; - System.arraycopy(leftBytes, 0, bytes, 0, leftBytes.length); - System.arraycopy(rightBytes, 0, bytes, leftBytes.length, rightBytes.length); - return makeLeafRopeNode.executeMake(bytes, encoding, CR_UNKNOWN, NotProvided.INSTANCE); - } - - public static CodeRange commonCodeRange(CodeRange first, CodeRange second, - ConditionProfile sameCodeRangeProfile, - ConditionProfile brokenCodeRangeProfile) { - if (sameCodeRangeProfile.profile(first == second)) { - return first; - } - - if (brokenCodeRangeProfile.profile((first == CR_BROKEN) || (second == CR_BROKEN))) { - return CR_BROKEN; - } - - // If we get this far, one must be CR_7BIT and the other must be CR_VALID, so promote to the more general code range. - return CR_VALID; - } - - public static CodeRange commonCodeRange(CodeRange first, CodeRange second) { - if (first == second) { - return first; - } - - if ((first == CR_BROKEN) || (second == CR_BROKEN)) { - return CR_BROKEN; - } - - // If we get this far, one must be CR_7BIT and the other must be CR_VALID, so promote to the more general code range. - return CR_VALID; - } - - protected static boolean isCodeRangeBroken(ManagedRope first, ManagedRope second) { - return first.getCodeRange() == CR_BROKEN || second.getCodeRange() == CR_BROKEN; - } - } - - @ImportStatic(RopeGuards.class) - @GenerateUncached - public abstract static class MakeLeafRopeNode extends RubyBaseNode { - - public static MakeLeafRopeNode create() { - return RopeNodesFactory.MakeLeafRopeNodeGen.create(); - } - - public abstract LeafRope executeMake(byte[] bytes, Encoding encoding, CodeRange codeRange, - Object characterLength); - - @Specialization(guards = "is7Bit(codeRange)") - protected LeafRope makeAsciiOnlyLeafRope( - byte[] bytes, Encoding encoding, CodeRange codeRange, Object characterLength) { - return new AsciiOnlyLeafRope(bytes, encoding); - } - - @Specialization(guards = "isValid(codeRange)") - protected LeafRope makeValidLeafRopeWithCharacterLength( - byte[] bytes, Encoding encoding, CodeRange codeRange, int characterLength) { - return new ValidLeafRope(bytes, encoding, characterLength); - } - - @Specialization(guards = { "isValid(codeRange)", "isFixedWidth(encoding)" }) - protected LeafRope makeValidLeafRopeFixedWidthEncoding( - byte[] bytes, Encoding encoding, CodeRange codeRange, NotProvided characterLength) { - final int calculatedCharacterLength = bytes.length / encoding.minLength(); - - return new ValidLeafRope(bytes, encoding, calculatedCharacterLength); - } - - @Specialization(guards = { "isValid(codeRange)", "!isFixedWidth(encoding)", "isAsciiCompatible(encoding)" }) - protected LeafRope makeValidLeafRopeAsciiCompat( - byte[] bytes, Encoding encoding, CodeRange codeRange, NotProvided characterLength, - @Cached BranchProfile errorProfile, - @Cached CalculateCharacterLengthNode calculateCharacterLengthNode) { - // Extracted from StringSupport.strLength. - - int calculatedCharacterLength = 0; - int p = 0; - int e = bytes.length; - - while (p < e) { - if (Encoding.isAscii(bytes[p])) { - int q = StringSupport.searchNonAscii(bytes, p, e); - if (q == -1) { - calculatedCharacterLength += (e - p); - break; - } - calculatedCharacterLength += q - p; - p = q; - } - - final int delta = calculateCharacterLengthNode - .characterLengthWithRecovery(encoding, CR_VALID, Bytes.fromRange(bytes, p, e)); - if (delta < 0) { - errorProfile.enter(); - throw Utils.unsupportedOperation( - "Code range is reported as valid, but is invalid for the given encoding: ", - encoding); - } - - p += delta; - calculatedCharacterLength++; - } - - return new ValidLeafRope(bytes, encoding, calculatedCharacterLength); - } - - @Specialization(guards = { "isValid(codeRange)", "!isFixedWidth(encoding)", "!isAsciiCompatible(encoding)" }) - protected LeafRope makeValidLeafRope( - byte[] bytes, Encoding encoding, CodeRange codeRange, NotProvided characterLength) { - // Extracted from StringSupport.strLength. - - int calculatedCharacterLength; - int p = 0; - int e = bytes.length; - - for (calculatedCharacterLength = 0; p < e; calculatedCharacterLength++) { - p += StringSupport.characterLength(encoding, codeRange, bytes, p, e); - } - - return new ValidLeafRope(bytes, encoding, calculatedCharacterLength); - } - - @Specialization(guards = "isBroken(codeRange)") - protected LeafRope makeInvalidLeafRope( - byte[] bytes, Encoding encoding, CodeRange codeRange, Object characterLength) { - return new InvalidLeafRope(bytes, encoding, RopeOperations.strLength(encoding, bytes, 0, bytes.length)); - } - - @Specialization(guards = { "isUnknown(codeRange)", "isEmpty(bytes)" }) - protected LeafRope makeUnknownLeafRopeEmpty( - byte[] bytes, Encoding encoding, CodeRange codeRange, Object characterLength, - @Cached ConditionProfile isUTF8, - @Cached ConditionProfile isUSAscii, - @Cached ConditionProfile isAscii8Bit, - @Cached ConditionProfile isAsciiCompatible) { - if (isUTF8.profile(encoding == UTF8Encoding.INSTANCE)) { - return RopeConstants.EMPTY_UTF8_ROPE; - } - - if (isUSAscii.profile(encoding == USASCIIEncoding.INSTANCE)) { - return RopeConstants.EMPTY_US_ASCII_ROPE; - } - - if (isAscii8Bit.profile(encoding == ASCIIEncoding.INSTANCE)) { - return RopeConstants.EMPTY_ASCII_8BIT_ROPE; - } - - if (isAsciiCompatible.profile(encoding.isAsciiCompatible())) { - return new AsciiOnlyLeafRope(RopeConstants.EMPTY_BYTES, encoding); - } - - return new ValidLeafRope(RopeConstants.EMPTY_BYTES, encoding, 0); - } - - @Specialization(guards = { "isUnknown(codeRange)", "!isEmpty(bytes)" }) - protected LeafRope makeUnknownLeafRopeGeneric( - byte[] bytes, Encoding encoding, CodeRange codeRange, Object characterLength, - @Cached CalculateAttributesNode calculateAttributesNode, - @Cached BranchProfile asciiOnlyProfile, - @Cached BranchProfile validProfile, - @Cached BranchProfile brokenProfile, - @Cached BranchProfile errorProfile) { - final StringAttributes attributes = calculateAttributesNode - .executeCalculateAttributes(encoding, new Bytes(bytes)); - - switch (attributes.getCodeRange()) { - case CR_7BIT: { - asciiOnlyProfile.enter(); - return new AsciiOnlyLeafRope(bytes, encoding); - } - - case CR_VALID: { - validProfile.enter(); - return new ValidLeafRope(bytes, encoding, attributes.getCharacterLength()); - } - - case CR_BROKEN: { - brokenProfile.enter(); - return new InvalidLeafRope(bytes, encoding, attributes.getCharacterLength()); - } - - default: { - errorProfile.enter(); - throw Utils.unsupportedOperation( - "CR_UNKNOWN encountered, but code range should have been calculated"); - } - } - } - - protected static boolean is7Bit(CodeRange codeRange) { - return codeRange == CR_7BIT; - } - - protected static boolean isValid(CodeRange codeRange) { - return codeRange == CR_VALID; - } - - protected static boolean isBroken(CodeRange codeRange) { - return codeRange == CR_BROKEN; - } - - protected static boolean isUnknown(CodeRange codeRange) { - return codeRange == CodeRange.CR_UNKNOWN; - } - - protected static boolean isFixedWidth(Encoding encoding) { - return encoding.isFixedWidth(); - } - - } - - @ImportStatic(RopeGuards.class) - public abstract static class RepeatNode extends RubyBaseNode { - - public static RepeatNode create() { - return RopeNodesFactory.RepeatNodeGen.create(); - } - - public abstract Rope executeRepeat(Rope base, int times); - - @Specialization(guards = "times == 0") - protected Rope repeatZero(Rope base, int times, - @Cached WithEncodingNode withEncodingNode) { - return withEncodingNode.executeWithEncoding(RopeConstants.EMPTY_UTF8_ROPE, base.getEncoding()); - } - - @Specialization(guards = "times == 1") - protected Rope repeatOne(Rope base, int times) { - return base; - } - - @TruffleBoundary - @Specialization(guards = { "isSingleByteString(base)", "times > 1" }) - protected Rope multiplySingleByteString(Rope base, int times, - @Cached MakeLeafRopeNode makeLeafRopeNode) { - final byte filler = base.getBytes()[0]; - - byte[] buffer = new byte[times]; - Arrays.fill(buffer, filler); - - return makeLeafRopeNode.executeMake(buffer, base.getEncoding(), base.getCodeRange(), times); - } - - @Specialization(guards = { "!isSingleByteString(base)", "times > 1" }) - protected Rope repeatManaged(ManagedRope base, int times) { - int byteLength; - try { - byteLength = Math.multiplyExact(base.byteLength(), times); - } catch (ArithmeticException e) { - throw new RaiseException( - getContext(), - getContext().getCoreExceptions().argumentError( - "Result of repeating string exceeds the system maximum string length", - this)); - } - - return new RepeatingRope(base, times, byteLength); - } - - @Specialization(guards = { "!isSingleByteString(base)", "times > 1" }) - protected Rope repeatNative(NativeRope base, int times, - @Cached NativeToManagedNode nativeToManagedNode) { - return executeRepeat(nativeToManagedNode.execute(base), times); - } - - } - - public abstract static class DebugPrintRopeNode extends RubyBaseNode { - - public abstract Object executeDebugPrint(Rope rope, int currentLevel, boolean printString); - - @TruffleBoundary - @Specialization - protected Object debugPrintLeafRope(LeafRope rope, int currentLevel, boolean printString) { - printPreamble(currentLevel); - - // Converting a rope to a java.lang.String may populate the byte[], so we need to query for the array status beforehand. - final boolean bytesAreNull = rope.getRawBytes() == null; - - System.err.println(String.format( - "%s (%s; BN: %b; BL: %d; CL: %d; CR: %s; E: %s)", - printString ? RopeOperations.escape(rope) : "", - rope.getClass().getSimpleName(), - bytesAreNull, - rope.byteLength(), - rope.characterLength(), - rope.getCodeRange(), - rope.getEncoding())); - - return nil; - } - - @TruffleBoundary - @Specialization - protected Object debugPrintSubstringRope(SubstringRope rope, int currentLevel, boolean printString) { - printPreamble(currentLevel); - - // Converting a rope to a java.lang.String may populate the byte[], so we need to query for the array status beforehand. - final boolean bytesAreNull = rope.getRawBytes() == null; - - System.err.println(String.format( - "%s (%s; BN: %b; BL: %d; CL: %d; CR: %s; O: %d; E: %s)", - printString ? RopeOperations.escape(rope) : "", - rope.getClass().getSimpleName(), - bytesAreNull, - rope.byteLength(), - rope.characterLength(), - rope.getCodeRange(), - rope.getByteOffset(), - rope.getEncoding())); - - executeDebugPrint(rope.getChild(), currentLevel + 1, printString); - - return nil; - } - - @TruffleBoundary - @Specialization - protected Object debugPrintConcatRopeBytes(ConcatRope rope, int currentLevel, boolean printString) { - printPreamble(currentLevel); - - final ConcatState state = rope.getState(); - - // Before the print, as `toString()` may cause the bytes to become populated. - final boolean bytesAreNull = rope.getRawBytes() == null; - - if (state.isFlattened()) { - System.err.println(String.format( - "%s (%s; BN: %b; BL: %d; CL: %d; CR: %s; E: %s)", - printString ? RopeOperations.escape(rope) : "", - rope.getClass().getSimpleName(), - bytesAreNull, - rope.byteLength(), - rope.characterLength(), - rope.getCodeRange(), - rope.getEncoding())); - } else { - System.err.println(String.format( - "%s (%s; BN: %b; BL: %d; CL: %d; CR: %s; E: %s)", - printString ? RopeOperations.escape(rope) : "", - rope.getClass().getSimpleName(), - bytesAreNull, - rope.byteLength(), - rope.characterLength(), - rope.getCodeRange(), - rope.getEncoding())); - - executeDebugPrint(state.left, currentLevel + 1, printString); - executeDebugPrint(state.right, currentLevel + 1, printString); - } - - return nil; - } - - @TruffleBoundary - @Specialization - protected Object debugPrintRepeatingRope(RepeatingRope rope, int currentLevel, boolean printString) { - printPreamble(currentLevel); - - // Converting a rope to a java.lang.String may populate the byte[], so we need to query for the array status beforehand. - final boolean bytesAreNull = rope.getRawBytes() == null; - - System.err.println(String.format( - "%s (%s; BN: %b; BL: %d; CL: %d; CR: %s; T: %d; E: %s)", - printString ? RopeOperations.escape(rope) : "", - rope.getClass().getSimpleName(), - bytesAreNull, - rope.byteLength(), - rope.characterLength(), - rope.getCodeRange(), - rope.getTimes(), - rope.getEncoding())); - - executeDebugPrint(rope.getChild(), currentLevel + 1, printString); - - return nil; - } - - @TruffleBoundary - @Specialization - protected Object debugPrintLazyInt(LazyIntRope rope, int currentLevel, boolean printString) { - printPreamble(currentLevel); - - // Converting a rope to a java.lang.String may populate the byte[], so we need to query for the array status beforehand. - final boolean bytesAreNull = rope.getRawBytes() == null; - - System.err.println(String.format( - "%s (%s; BN: %b; BL: %d; CL: %d; CR: %s; V: %d, E: %s)", - printString ? RopeOperations.escape(rope) : "", - rope.getClass().getSimpleName(), - bytesAreNull, - rope.byteLength(), - rope.characterLength(), - rope.getCodeRange(), - rope.getValue(), - rope.getEncoding())); - - return nil; - } - - @TruffleBoundary - @Specialization - protected Object debugPrintNative(NativeRope rope, int currentLevel, boolean printString) { - printPreamble(currentLevel); - - System.err.println(String.format( - "%s (%s; BL: %d; CL: %d; CR: %s; P: 0x%x, S: %d; E: %s)", - printString ? RopeOperations.escape(rope) : "", - rope.getClass().getSimpleName(), - rope.byteLength(), - rope.characterLength(), - rope.getCodeRange(), - rope.getNativePointer().getAddress(), - rope.getNativePointer().getSize(), - rope.getEncoding())); - - return nil; - } - - private void printPreamble(int level) { - if (level > 0) { - for (int i = 0; i < level; i++) { - System.err.print("| "); - } - } - } - - } - - @ImportStatic(CompilerDirectives.class) - @GenerateUncached - public abstract static class WithEncodingNode extends RubyBaseNode { - - public static WithEncodingNode create() { - return RopeNodesFactory.WithEncodingNodeGen.create(); - } - - public abstract Rope executeWithEncoding(Rope rope, Encoding encoding); - - @Specialization(guards = "rope.getEncoding() == encoding") - protected Rope sameEncoding(Rope rope, Encoding encoding) { - return rope; - } - - @Specialization(guards = "rope.getEncoding() != encoding") - protected Rope nativeRopeWithEncoding(NativeRope rope, Encoding encoding) { - return rope.withEncoding(encoding); - } - - @Specialization( - guards = { "managedRope.getEncoding() != encoding", "isExact(managedRope, cachedRopeClass)", }, - limit = "getCacheLimit()") - protected Rope asciiCompatible(ManagedRope managedRope, Encoding encoding, - @Cached("managedRope.getClass()") Class cachedRopeClass, - @Cached ConditionProfile asciiCompatibleProfile, - @Cached ConditionProfile asciiOnlyProfile, - @Cached ConditionProfile binaryEncodingProfile, - @Cached ConditionProfile bytesNotNull, - @Cached BytesNode bytesNode, - @Cached MakeLeafRopeNode makeLeafRopeNode) { - final ManagedRope rope = CompilerDirectives.castExact(managedRope, cachedRopeClass); - - if (asciiCompatibleProfile.profile(encoding.isAsciiCompatible())) { - if (asciiOnlyProfile.profile(rope.isAsciiOnly())) { - // ASCII-only strings can trivially convert to other ASCII-compatible encodings. - return rope.withEncoding7bit(encoding, bytesNotNull); - } else if (binaryEncodingProfile.profile(encoding == ASCIIEncoding.INSTANCE && - rope.getCodeRange() == CR_VALID && - rope.getEncoding().isAsciiCompatible())) { - // ASCII-compatible CR_VALID strings are also CR_VALID in binary, but they might change character length. - final Rope binary = rope.withBinaryEncoding(bytesNotNull); - assert binary.getCodeRange() == CR_VALID; - return binary; - } else { - // The rope either has a broken code range or isn't ASCII-compatible. In the case of a broken - // code range, we must perform a new code range scan with the target encoding to see if it's still - // broken. In the case of a non-ASCII-compatible encoding we don't have a quick way to reinterpret - // the byte sequence. - return rescanBytesForEncoding(rope, encoding, bytesNode, makeLeafRopeNode); - } - } else { - // We don't know of any good way to quickly reinterpret bytes from two different encodings, so we - // must perform a full code range scan and character length calculation. - return rescanBytesForEncoding(rope, encoding, bytesNode, makeLeafRopeNode); - } - } - - private Rope rescanBytesForEncoding(ManagedRope rope, Encoding encoding, BytesNode bytesNode, - MakeLeafRopeNode makeLeafRopeNode) { - return makeLeafRopeNode.executeMake(bytesNode.execute(rope), encoding, CR_UNKNOWN, NotProvided.INSTANCE); - } - - protected int getCacheLimit() { - return Rope.NUMBER_OF_CONCRETE_CLASSES; - } - - } - - @GenerateUncached - public abstract static class GetByteNode extends RubyBaseNode { - - public static GetByteNode create() { - return RopeNodesFactory.GetByteNodeGen.create(); - } - - public abstract int executeGetByte(Rope rope, int index); - - @Specialization(guards = "rope.getRawBytes() != null") - protected int getByte(Rope rope, int index) { - return rope.getRawBytes()[index] & 0xff; - } - - @Specialization(guards = "rope.getRawBytes() == null") - protected int getByte(NativeRope rope, int index) { - return rope.get(index) & 0xff; - } - - @TruffleBoundary - @Specialization(guards = "rope.getRawBytes() == null") - protected int getByte(LazyIntRope rope, int index) { - return rope.getBytes()[index] & 0xff; - } - - @Specialization(guards = "rope.getRawBytes() == null") - protected int getByteSubstringRope(SubstringRope rope, int index, - @Cached ConditionProfile childRawBytesNullProfile, - @Cached ByteSlowNode slowByte) { - if (childRawBytesNullProfile.profile(rope.getChild().getRawBytes() == null)) { - return slowByte.execute(rope, index) & 0xff; - } - - return rope.getChild().getRawBytes()[index + rope.getByteOffset()] & 0xff; - } - - @Specialization(guards = "rope.getRawBytes() == null") - protected int getByteRepeatingRope(RepeatingRope rope, int index, - @Cached ConditionProfile childRawBytesNullProfile, - @Cached ByteSlowNode slowByte) { - if (childRawBytesNullProfile.profile(rope.getChild().getRawBytes() == null)) { - return slowByte.execute(rope, index) & 0xff; - } - - return rope.getChild().getRawBytes()[index % rope.getChild().byteLength()] & 0xff; - } - - // NOTE(norswap, 12 Jan 2021): The order of the two next specialization is significant. - // Normally, @Bind expressions should only be run per node, but that's not the case currently (GR-28671). - // Therefore it's important to test isChildren first, as it's possible to transition from children to bytes - // but not the other way around. - - @Specialization(guards = "!state.isFlattened()") - protected int getByteConcatRope(ConcatRope rope, int index, - @Cached ConditionProfile stateBytesNotNull, - @Bind("rope.getState(stateBytesNotNull)") ConcatState state, - @Cached ConditionProfile chooseLeftChildProfile, - @Cached ConditionProfile leftChildRawBytesNullProfile, - @Cached ConditionProfile rightChildRawBytesNullProfile, - @Cached ByteSlowNode byteSlowLeft, - @Cached ByteSlowNode byteSlowRight) { - if (chooseLeftChildProfile.profile(index < state.left.byteLength())) { - if (leftChildRawBytesNullProfile.profile(state.left.getRawBytes() == null)) { - return byteSlowLeft.execute(state.left, index) & 0xff; - } - - return state.left.getRawBytes()[index] & 0xff; - } - - if (rightChildRawBytesNullProfile.profile(state.right.getRawBytes() == null)) { - return byteSlowRight.execute(state.right, index - state.left.byteLength()) & 0xff; - } - - return state.right.getRawBytes()[index - state.left.byteLength()] & 0xff; - } - - // Necessary because getRawBytes() might return null, but then be populated and the children nulled - // before we get to run the other getByteConcatRope. - @Specialization(guards = "state.isFlattened()") - protected int getByteConcatRope(ConcatRope rope, int index, - @Cached ConditionProfile stateBytesNotNull, - @Bind("rope.getState(stateBytesNotNull)") ConcatState state) { - return state.bytes[index] & 0xff; - } - } - - public abstract static class SetByteNode extends RubyBaseNode { - - @Child private ConcatNode composedConcatNode = ConcatNode.create(); - @Child private ConcatNode middleConcatNode = ConcatNode.create(); - @Child private MakeLeafRopeNode makeLeafRopeNode = MakeLeafRopeNode.create(); - @Child private SubstringNode leftSubstringNode = SubstringNode.create(); - @Child private SubstringNode rightSubstringNode = SubstringNode.create(); - - public static SetByteNode create() { - return SetByteNodeGen.create(); - } - - public abstract Rope executeSetByte(Rope string, int index, int value); - - @Specialization - protected Rope setByte(ManagedRope rope, int index, int value) { - assert 0 <= index && index < rope.byteLength(); - - final Rope left = leftSubstringNode.executeSubstring(rope, 0, index); - final Rope right = rightSubstringNode.executeSubstring(rope, index + 1, rope.byteLength() - index - 1); - final Rope middle = makeLeafRopeNode.executeMake( - new byte[]{ (byte) value }, - rope.getEncoding(), - CodeRange.CR_UNKNOWN, - NotProvided.INSTANCE); - final Rope composed = composedConcatNode.executeConcat( - middleConcatNode.executeConcat(left, middle, rope.getEncoding()), - right, - rope.getEncoding()); - - return composed; - } - - @Specialization - protected Rope setByte(NativeRope rope, int index, int value) { - rope.set(index, value); - return rope; - } - - } - - public abstract static class GetCodePointNode extends RubyBaseNode { - - @Child private CalculateCharacterLengthNode calculateCharacterLengthNode; - @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode.create(); - - public static GetCodePointNode create() { - return RopeNodesFactory.GetCodePointNodeGen.create(); - } - - public abstract int executeGetCodePoint(RubyEncoding encoding, Rope rope, int index); - - @Specialization(guards = "singleByteOptimizableNode.execute(rope)") - protected int getCodePointSingleByte(RubyEncoding encoding, Rope rope, int index, - @Cached @Exclusive GetByteNode getByteNode) { - return getByteNode.executeGetByte(rope, index); - } - - @Specialization(guards = { "!singleByteOptimizableNode.execute(rope)", "rope.getEncoding().isUTF8()" }) - protected int getCodePointUTF8(RubyEncoding encoding, Rope rope, int index, - @Cached @Exclusive GetByteNode getByteNode, - @Cached ConditionProfile singleByteCharProfile, - @Cached @Shared("getBytesObject") GetBytesObjectNode getBytesObject, - @Cached @Shared("codeRangeNode") CodeRangeNode codeRangeNode, - @Cached @Shared("errorProfile") BranchProfile errorProfile) { - final int firstByte = getByteNode.executeGetByte(rope, index); - if (singleByteCharProfile.profile(firstByte < 128)) { - return firstByte; - } - - return getCodePointMultiByte(encoding, rope, index, getBytesObject, codeRangeNode, errorProfile); - } - - @Specialization(guards = { "!singleByteOptimizableNode.execute(rope)", "!rope.getEncoding().isUTF8()" }) - protected int getCodePointMultiByte(RubyEncoding encoding, Rope rope, int index, - @Cached @Shared("getBytesObject") GetBytesObjectNode getBytesObject, - @Cached @Shared("codeRangeNode") CodeRangeNode codeRangeNode, - @Cached @Shared("errorProfile") BranchProfile errorProfile) { - final Bytes bytes = getBytesObject.getRange(rope, index, rope.byteLength()); - final CodeRange codeRange = codeRangeNode.execute(rope); - - final int characterLength = characterLength(encoding.jcoding, codeRange, bytes); - if (characterLength <= 0) { - errorProfile.enter(); - throw new RaiseException( - getContext(), - getContext().getCoreExceptions().argumentError( - Utils.concat("invalid byte sequence in ", encoding), - null)); - } - - return mbcToCode(encoding.jcoding, bytes); - } - - @TruffleBoundary - private int mbcToCode(Encoding encoding, Bytes bytes) { - return encoding.mbcToCode(bytes.array, bytes.offset, bytes.end()); - } - - private int characterLength(Encoding encoding, CodeRange codeRange, Bytes bytes) { - if (calculateCharacterLengthNode == null) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - calculateCharacterLengthNode = insert(CalculateCharacterLengthNode.create()); - } - - return calculateCharacterLengthNode.characterLength(encoding, codeRange, bytes); - } - } - - @ImportStatic(RopeGuards.class) - public abstract static class FlattenNode extends RubyBaseNode { - - @Child private MakeLeafRopeNode makeLeafRopeNode = MakeLeafRopeNode.create(); - - public static FlattenNode create() { - return RopeNodesFactory.FlattenNodeGen.create(); - } - - public abstract LeafRope executeFlatten(Rope rope); - - @Specialization - protected LeafRope flattenLeafRope(LeafRope rope) { - return rope; - } - - @Specialization - protected LeafRope flattenNativeRope(NativeRope rope, - @Cached NativeToManagedNode nativeToManagedNode) { - return nativeToManagedNode.execute(rope); - } - - @Specialization(guards = { "!isLeafRope(rope)", "rope.getRawBytes() != null" }) - protected LeafRope flattenNonLeafWithBytes(ManagedRope rope) { - return makeLeafRopeNode - .executeMake(rope.getRawBytes(), rope.getEncoding(), rope.getCodeRange(), rope.characterLength()); - } - - @Specialization(guards = { "!isLeafRope(rope)", "rope.getRawBytes() == null" }) - protected LeafRope flatten(ManagedRope rope) { - // NB: We call RopeOperations.flatten here rather than Rope#getBytes so we don't populate the byte[] in - // the source `rope`. Otherwise, we'll end up a fully populated reference in both the source `rope` and the - // flattened one, which could adversely affect GC. - final byte[] bytes = RopeOperations.flattenBytes(rope); - - return makeLeafRopeNode.executeMake(bytes, rope.getEncoding(), rope.getCodeRange(), rope.characterLength()); - } - - } - - public abstract static class EqualNode extends RubyBaseNode { - - public static EqualNode create() { - return RopeNodesFactory.EqualNodeGen.create(); - } - - public abstract boolean execute(Rope a, Rope b); - - @Specialization(guards = "a == b") - protected boolean sameRopeEqual(Rope a, Rope b) { - return true; - } - - @Specialization - protected boolean ropesEqual(Rope a, Rope b, - @Cached BranchProfile differentEncodingProfile, - @Cached BytesEqualNode bytesEqualNode) { - if (a.getEncoding() != b.getEncoding()) { - differentEncodingProfile.enter(); - return false; - } - - return bytesEqualNode.execute(a, b); - } - - } - - // This node type checks for the equality of the bytes owned by a rope but does not pay - // attention to the encoding. - public abstract static class BytesEqualNode extends RubyBaseNode { - - public static BytesEqualNode create() { - return RopeNodesFactory.BytesEqualNodeGen.create(); - } - - public abstract boolean execute(Rope a, Rope b); - - @Specialization(guards = "a == b") - protected boolean sameRopes(Rope a, Rope b) { - return true; - } - - @Specialization(guards = { "a == cachedA", "b == cachedB", "canBeCached" }, limit = "getIdentityCacheLimit()") - protected boolean cachedRopes(Rope a, Rope b, - @Cached("a") Rope cachedA, - @Cached("b") Rope cachedB, - @Cached("canBeCached(cachedA, cachedB)") boolean canBeCached, - @Cached("cachedA.bytesEqual(cachedB)") boolean equal) { - return equal; - } - - @Specialization(guards = { "a != b", "a.getRawBytes() != null", "a.getRawBytes() == b.getRawBytes()" }) - protected boolean sameByteArrays(Rope a, Rope b) { - return true; - } - - @Specialization( - guards = { - "a != b", - "a.getRawBytes() != null", - "b.getRawBytes() != null", - "a.byteLength() == 1", - "b.byteLength() == 1" }) - protected boolean characterEqual(Rope a, Rope b) { - return a.getRawBytes()[0] == b.getRawBytes()[0]; - } - - @Specialization(guards = "a != b", replaces = { "cachedRopes", "sameByteArrays", "characterEqual" }) - protected boolean fullRopeEqual(Rope a, Rope b, - @Cached ConditionProfile aRawBytesProfile, - @Cached BranchProfile sameByteArraysProfile, - @Cached BranchProfile differentLengthProfile, - @Cached ConditionProfile aCalculatedHashProfile, - @Cached ConditionProfile bCalculatedHashProfile, - @Cached ConditionProfile differentHashProfile, - @Cached BytesNode aBytesNode, - @Cached BytesNode bBytesNode) { - if (aRawBytesProfile.profile(a.getRawBytes() != null) && a.getRawBytes() == b.getRawBytes()) { - sameByteArraysProfile.enter(); - return true; - } - - if (a.byteLength() != b.byteLength()) { - differentLengthProfile.enter(); - return false; - } - - if (aCalculatedHashProfile.profile(a.isHashCodeCalculated()) && - bCalculatedHashProfile.profile(b.isHashCodeCalculated()) && - differentHashProfile.profile(a.calculatedHashCode() != b.calculatedHashCode())) { - return false; - } - - final byte[] aBytes = aBytesNode.execute(a); - final byte[] bBytes = bBytesNode.execute(b); - - // Fold the a.length == b.length condition at compilation in Arrays.equals() since we already know it holds - if (aBytes.length != bBytes.length) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - throw new Error("unreachable"); - } - return Arrays.equals(aBytes, bBytes); - } - - protected boolean canBeCached(Rope a, Rope b) { - if (getContext().isPreInitializing()) { - final String home = getLanguage().getRubyHome(); - return !RopeOperations.anyChildContains(a, home) && !RopeOperations.anyChildContains(b, home); - } else { - return true; - } - } - - } - - @GenerateUncached - public abstract static class BytesNode extends RubyBaseNode { - - public static BytesNode create() { - return RopeNodesFactory.BytesNodeGen.create(); - } - - public abstract byte[] execute(Rope rope); - - @Specialization(guards = "rope.getRawBytes() != null") - protected byte[] getBytesManaged(ManagedRope rope) { - return rope.getRawBytes(); - } - - @TruffleBoundary - @Specialization(guards = "rope.getRawBytes() == null") - protected byte[] getBytesManagedAndFlatten(ManagedRope rope) { - return rope.getBytes(); - } - - @Specialization - protected byte[] getBytesNative(NativeRope rope) { - return rope.getBytes(); - } - } - - @GenerateUncached - public abstract static class ByteSlowNode extends RubyBaseNode { - - public static ByteSlowNode create() { - return RopeNodesFactory.ByteSlowNodeGen.create(); - } - - public abstract byte execute(Rope rope, int index); - - @Specialization - protected byte getByteFromSubString(SubstringRope rope, int index, - @Cached ByteSlowNode childNode) { - return childNode.execute(rope.getChild(), rope.getByteOffset() + index); - } - - @Specialization(guards = "rope.getRawBytes() != null") - protected byte fastByte(ManagedRope rope, int index) { - return rope.getRawBytes()[index]; - } - - @TruffleBoundary - @Specialization(guards = { "rope.getRawBytes() == null", "!isSubstringRope(rope)" }) - protected byte getByteFromRope(ManagedRope rope, int index) { - return rope.getByteSlow(index); - } - - @Specialization - protected byte getByteFromNativeRope(NativeRope rope, int index) { - return rope.getByteSlow(index); - } - - protected static boolean isSubstringRope(ManagedRope rope) { - return rope instanceof SubstringRope; - } - } - - @GenerateUncached - public abstract static class AsciiOnlyNode extends RubyBaseNode { - - public static AsciiOnlyNode create() { - return RopeNodesFactory.AsciiOnlyNodeGen.create(); - } - - public abstract boolean execute(Rope rope); - - @Specialization - protected boolean asciiOnly(Rope rope, - @Cached CodeRangeNode codeRangeNode) { - return codeRangeNode.execute(rope) == CR_7BIT; - } - - } - - @GenerateUncached - public abstract static class CodeRangeNode extends RubyBaseNode { - - public static CodeRangeNode create() { - return RopeNodesFactory.CodeRangeNodeGen.create(); - } - - public abstract CodeRange execute(Rope rope); - - @Specialization - protected CodeRange getCodeRangeManaged(ManagedRope rope) { - return rope.getCodeRange(); - } - - @Specialization - protected CodeRange getCodeRangeNative(NativeRope rope, - @Cached BytesNode getBytes, - @Cached CalculateAttributesNode calculateAttributesNode, - @Cached ConditionProfile unknownCodeRangeProfile, - @Cached GetBytesObjectNode getBytesObject) { - if (unknownCodeRangeProfile.profile(rope.getRawCodeRange() == CR_UNKNOWN)) { - final StringAttributes attributes = calculateAttributesNode - .executeCalculateAttributes(rope.getEncoding(), getBytesObject.getBytes(rope)); - rope.updateAttributes(attributes); - return attributes.getCodeRange(); - } else { - return rope.getRawCodeRange(); - } - } - - } - - @GenerateUncached - public abstract static class HashNode extends RubyBaseNode { - - public static HashNode create() { - return RopeNodesFactory.HashNodeGen.create(); - } - - public abstract int execute(Rope rope); - - @Specialization(guards = "rope.isHashCodeCalculated()") - protected int executeHashCalculated(Rope rope) { - return rope.calculatedHashCode(); - } - - @Specialization(guards = "!rope.isHashCodeCalculated()") - protected int executeHashNotCalculated(Rope rope) { - return rope.hashCode(); - } - - } - - public abstract static class CharacterLengthNode extends RubyBaseNode { - - public static CharacterLengthNode create() { - return RopeNodesFactory.CharacterLengthNodeGen.create(); - } - - public abstract int execute(Rope rope); - - @Specialization - protected int getCharacterLengthManaged(ManagedRope rope) { - return rope.characterLength(); - } - - @Specialization - protected int getCharacterLengthNative(NativeRope rope, - @Cached BytesNode getBytes, - @Cached CalculateAttributesNode calculateAttributesNode, - @Cached ConditionProfile unknownCharacterLengthProfile, - @Cached GetBytesObjectNode getBytesObjectNode) { - if (unknownCharacterLengthProfile - .profile(rope.rawCharacterLength() == NativeRope.UNKNOWN_CHARACTER_LENGTH)) { - final StringAttributes attributes = calculateAttributesNode - .executeCalculateAttributes(rope.getEncoding(), getBytesObjectNode.getBytes(rope)); - rope.updateAttributes(attributes); - return attributes.getCharacterLength(); - } else { - return rope.rawCharacterLength(); - } - } - - } - - public abstract static class SingleByteOptimizableNode extends RubyBaseNode { - - public static SingleByteOptimizableNode create() { - return RopeNodesFactory.SingleByteOptimizableNodeGen.create(); - } - - public abstract boolean execute(Rope rope); - - @Specialization - protected boolean isSingleByteOptimizable(Rope rope, - @Cached AsciiOnlyNode asciiOnlyNode, - @Cached ConditionProfile asciiOnlyProfile) { - final boolean asciiOnly = asciiOnlyNode.execute(rope); - - if (asciiOnlyProfile.profile(asciiOnly)) { - return true; - } else { - return rope.getEncoding().isSingleByte(); - } - } - - } - - @ImportStatic(CodeRange.class) - @GenerateUncached - public abstract static class CalculateCharacterLengthNode extends RubyBaseNode { - - public static CalculateCharacterLengthNode create() { - return RopeNodesFactory.CalculateCharacterLengthNodeGen.create(); - } - - protected abstract int executeLength(Encoding encoding, CodeRange codeRange, Bytes bytes, - boolean recoverIfBroken); - - /** This method returns the byte length for the first character encountered in `bytes`. The validity of a - * character is defined by the `encoding`. If the `codeRange` for the byte sequence is known for the supplied - * `encoding`, it should be passed to help short-circuit some validation checks. If the `codeRange` is not known - * for the supplied `encoding`, then `CodeRange.CR_UNKNOWN` should be passed. If the byte sequence is invalid, a - * negative value will be returned. See `Encoding#length` for details on how to interpret the return value. */ - public int characterLength(Encoding encoding, CodeRange codeRange, Bytes bytes) { - return executeLength(encoding, codeRange, bytes, false); - } - - /** This method works very similarly to `characterLength` and maintains the same invariants on inputs. Where it - * differs is in the treatment of invalid byte sequences. Whereas `characterLength` will return a negative - * value, this method will always return a positive value. MRI provides an arbitrary, but deterministic, - * algorithm for returning a byte length for invalid byte sequences. This method is to be used when the - * `codeRange` might be `CodeRange.CR_BROKEN` and the caller must handle the case without raising an error. - * E.g., if `String#each_char` is called on a String that is `CR_BROKEN`, you wouldn't want negative byte - * lengths to be returned because it would break iterating through the bytes. */ - public int characterLengthWithRecovery(Encoding encoding, CodeRange codeRange, Bytes bytes) { - return executeLength(encoding, codeRange, bytes, true); - } - - @Specialization(guards = "codeRange == CR_7BIT") - protected int cr7Bit(Encoding encoding, CodeRange codeRange, Bytes bytes, boolean recoverIfBroken) { - assert bytes.length > 0; - return 1; - } - - @Specialization(guards = { "codeRange == CR_VALID", "encoding.isUTF8()" }) - protected int validUtf8(Encoding encoding, CodeRange codeRange, Bytes bytes, boolean recoverIfBroken, - @Cached @Exclusive BranchProfile oneByteProfile, - @Cached @Exclusive BranchProfile twoBytesProfile, - @Cached @Exclusive BranchProfile threeBytesProfile, - @Cached @Exclusive BranchProfile fourBytesProfile) { - final byte b = bytes.get(0); - final int ret; - - if (b >= 0) { - oneByteProfile.enter(); - ret = 1; - } else { - switch (b & 0xf0) { - case 0xe0: - threeBytesProfile.enter(); - ret = 3; - break; - case 0xf0: - fourBytesProfile.enter(); - ret = 4; - break; - default: - twoBytesProfile.enter(); - ret = 2; - break; - } - } - - return ret; - } - - @Specialization(guards = { "codeRange == CR_VALID", "encoding.isAsciiCompatible()" }) - protected int validAsciiCompatible(Encoding encoding, CodeRange codeRange, Bytes bytes, boolean recoverIfBroken, - @Cached @Exclusive ConditionProfile asciiCharProfile) { - if (asciiCharProfile.profile(bytes.get(0) >= 0)) { - return 1; - } else { - return encodingLength(encoding, bytes); - } - } - - @Specialization(guards = { "codeRange == CR_VALID", "encoding.isFixedWidth()" }) - protected int validFixedWidth(Encoding encoding, CodeRange codeRange, Bytes bytes, boolean recoverIfBroken) { - final int width = encoding.minLength(); - assert bytes.length >= width; - return width; - } - - @Specialization( - guards = { - "codeRange == CR_VALID", - /* UTF-8 is ASCII-compatible, so we don't need to check the encoding is not UTF-8 here. */ - "!encoding.isAsciiCompatible()", - "!encoding.isFixedWidth()" }) - protected int validGeneral(Encoding encoding, CodeRange codeRange, Bytes bytes, boolean recoverIfBroken) { - return encodingLength(encoding, bytes); - } - - @Specialization(guards = { "codeRange == CR_BROKEN || codeRange == CR_UNKNOWN", "recoverIfBroken" }) - protected int brokenOrUnknownWithRecovery( - Encoding encoding, CodeRange codeRange, Bytes bytes, boolean recoverIfBroken, - @Cached @Shared("validCharWidthProfile") ConditionProfile validCharWidthProfile, - @Cached @Exclusive ConditionProfile minEncodingWidthUsedProfile) { - final int width = encodingLength(encoding, bytes); - - if (validCharWidthProfile.profile(width > 0 && width <= bytes.length)) { - return width; - } else { - final int minEncodingWidth = encoding.minLength(); - - if (minEncodingWidthUsedProfile.profile(minEncodingWidth <= bytes.length)) { - return minEncodingWidth; - } else { - return bytes.length; - } - } - } - - @Specialization(guards = { "codeRange == CR_BROKEN || codeRange == CR_UNKNOWN", "!recoverIfBroken" }) - protected int brokenOrUnknownWithoutRecovery( - Encoding encoding, CodeRange codeRange, Bytes bytes, boolean recoverIfBroken, - @Cached @Shared("validCharWidthProfile") ConditionProfile validCharWidthProfile) { - - final int width = encodingLength(encoding, bytes); - - if (validCharWidthProfile.profile(width <= bytes.length)) { - return width; - } else { - return StringSupport.MBCLEN_NEEDMORE(width - bytes.length); - } - } - - @TruffleBoundary - private int encodingLength(Encoding encoding, Bytes bytes) { - return encoding.length(bytes.array, bytes.offset, bytes.offset + bytes.length); - } - - } - - public abstract static class NativeToManagedNode extends RubyBaseNode { - - public static NativeToManagedNode create() { - return RopeNodesFactory.NativeToManagedNodeGen.create(); - } - - public abstract LeafRope execute(NativeRope rope); - - @Specialization - protected LeafRope nativeToManaged(NativeRope rope, - @Cached BytesNode bytesNode, - @Cached MakeLeafRopeNode makeLeafRopeNode) { - // Ideally, a NativeRope would always have an accurate code range and character length. However, in practice, - // it's possible for a bad code range to be associated with the rope due to native memory being updated by - // 3rd party libraries. So, we must re-calculate the code range and character length values upon conversion - // to a ManagedRope. - return makeLeafRopeNode - .executeMake(bytesNode.execute(rope), rope.getEncoding(), CR_UNKNOWN, NotProvided.INSTANCE); - } - - } - - @ImportStatic(RopeGuards.class) - public abstract static class AreComparableRopesNode extends RubyBaseNode { - - public static AreComparableRopesNode create() { - return AreComparableRopesNodeGen.create(); - } - - @Child CodeRangeNode codeRangeNode = RopeNodes.CodeRangeNode.create(); - - public abstract boolean execute(Rope firstRope, Rope secondRope); - - @Specialization(guards = "a.getEncoding() == b.getEncoding()") - protected boolean sameEncoding(Rope a, Rope b) { - return true; - } - - @Specialization(guards = "a.isEmpty()") - protected boolean firstEmpty(Rope a, Rope b) { - return true; - } - - @Specialization(guards = "b.isEmpty()") - protected boolean secondEmpty(Rope a, Rope b) { - return true; - } - - @Specialization(guards = { "is7Bit(a, codeRangeNode)", "is7Bit(b, codeRangeNode)" }) - protected boolean bothCR7bit(Rope a, Rope b) { - return true; - } - - @Specialization(guards = { "is7Bit(a, codeRangeNode)", "isAsciiCompatible(b)" }) - protected boolean CR7bitASCII(Rope a, Rope b) { - return true; - } - - @Specialization(guards = { "isAsciiCompatible(a)", "is7Bit(b, codeRangeNode)" }) - protected boolean ASCIICR7bit(Rope a, Rope b) { - return true; - } - - @Fallback - protected boolean notCompatible(Rope a, Rope b) { - return false; - } - - } - - public abstract static class CompareRopesNode extends RubyBaseNode { - - public static CompareRopesNode create() { - return CompareRopesNodeGen.create(); - } - - public abstract int execute(Rope firstRope, Rope secondRope); - - @Specialization - protected int compareRopes(Rope firstRope, Rope secondRope, - @Cached ConditionProfile equalSubsequenceProfile, - @Cached ConditionProfile equalLengthProfile, - @Cached ConditionProfile firstStringShorterProfile, - @Cached ConditionProfile greaterThanProfile, - @Cached ConditionProfile equalProfile, - @Cached ConditionProfile notComparableProfile, - @Cached ConditionProfile encodingIndexGreaterThanProfile, - @Cached LoopConditionProfile loopProfile, - @Cached BytesNode firstBytesNode, - @Cached BytesNode secondBytesNode, - @Cached AreComparableRopesNode areComparableRopesNode) { - final boolean firstRopeShorter = firstStringShorterProfile - .profile(firstRope.byteLength() < secondRope.byteLength()); - final int memcmpLength; - if (firstRopeShorter) { - memcmpLength = firstRope.byteLength(); - } else { - memcmpLength = secondRope.byteLength(); - } - - final byte[] bytes = firstBytesNode.execute(firstRope); - final byte[] otherBytes = secondBytesNode.execute(secondRope); - - final int ret; - - final int cmp = mismatch(bytes, otherBytes, memcmpLength); - if (equalSubsequenceProfile.profile(cmp == -1)) { - if (equalLengthProfile.profile(firstRope.byteLength() == secondRope.byteLength())) { - ret = 0; - } else { - if (firstRopeShorter) { - ret = -1; - } else { - ret = 1; - } - } - } else { - ret = greaterThanProfile.profile((bytes[cmp] & 0xFF) > (otherBytes[cmp] & 0xFF)) ? 1 : -1; - } - - if (equalProfile.profile(ret == 0)) { - if (notComparableProfile.profile(!areComparableRopesNode.execute(firstRope, secondRope))) { - if (encodingIndexGreaterThanProfile - .profile(firstRope.getEncoding().getIndex() > secondRope.getEncoding().getIndex())) { - return 1; - } else { - return -1; - } - } - } - - return ret; - - } - - @TruffleBoundary - private static int mismatch(byte[] bytes, byte[] otherBytes, int memcmpLength) { - return Arrays.mismatch(bytes, 0, memcmpLength, otherBytes, 0, memcmpLength); - } - } - - /** Returns a {@link Bytes} object for the given rope and bounds. This will simply get the bytes for the rope and - * build the object, except in the case of {@link SubstringRope} which is optimized to use the bytes of the child - * rope instead - which is better for footprint. */ - @GenerateUncached - public abstract static class GetBytesObjectNode extends RubyBaseNode { - - public static GetBytesObjectNode create() { - return RopeNodesFactory.GetBytesObjectNodeGen.create(); - } - - public static GetBytesObjectNode getUncached() { - return RopeNodesFactory.GetBytesObjectNodeGen.getUncached(); - } - - public Bytes getBytes(Rope rope) { - return execute(rope, 0, rope.byteLength()); - } - - public abstract Bytes execute(Rope rope, int offset, int length); - - public Bytes getClamped(Rope rope, int offset, int length) { - return execute(rope, offset, Math.min(length, rope.byteLength() - offset)); - } - - public Bytes getRange(Rope rope, int start, int end) { - return execute(rope, start, end - start); - } - - public Bytes getClampedRange(Rope rope, int start, int end) { - return execute(rope, start, Math.min(rope.byteLength(), end) - start); - } - - @Specialization(guards = "rope.getRawBytes() != null") - protected Bytes getBytesObjectFromRaw(Rope rope, int offset, int length) { - return new Bytes(rope.getRawBytes(), offset, length); - } - - @Specialization(guards = "rope.getRawBytes() == null") - protected Bytes getBytesObject(SubstringRope rope, int offset, int length, - @Cached @Shared("bytes") BytesNode bytes) { - return new Bytes(bytes.execute(rope.getChild()), rope.getByteOffset() + offset, length); - } - - @Specialization(guards = { "rope.getRawBytes() == null", "!isSubstringRope(rope)" }) - protected Bytes getBytesObject(ManagedRope rope, int offset, int length, - @Cached @Shared("bytes") BytesNode bytes) { - return new Bytes(bytes.execute(rope), offset, length); - } - - @Specialization(guards = "rope.getRawBytes() == null") - protected Bytes getBytesObject(NativeRope rope, int offset, int length) { - return new Bytes(rope.getBytes(offset, length)); - } - - protected static boolean isSubstringRope(ManagedRope rope) { - return rope instanceof SubstringRope; - } - } -} diff --git a/src/main/java/org/truffleruby/core/rope/RopeOperations.java b/src/main/java/org/truffleruby/core/rope/RopeOperations.java deleted file mode 100644 index 60d0a39c4f65..000000000000 --- a/src/main/java/org/truffleruby/core/rope/RopeOperations.java +++ /dev/null @@ -1,692 +0,0 @@ -/* - * Copyright (c) 2016, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - * - * - * Some of the code in this class is modified from org.jruby.runtime.Helpers and org.jruby.util.StringSupport, - * licensed under the same EPL 2.0/GPL 2.0/LGPL 2.1 used throughout. - * - * Contains code modified from ByteList's ByteList.java - * - * Copyright (C) 2007-2010 JRuby Community - * Copyright (C) 2007 Charles O Nutter - * Copyright (C) 2007 Nick Sieger - * Copyright (C) 2007 Ola Bini - * Copyright (C) 2007 William N Dortch - */ -package org.truffleruby.core.rope; - -import static org.truffleruby.core.rope.CodeRange.CR_7BIT; -import static org.truffleruby.core.rope.CodeRange.CR_BROKEN; -import static org.truffleruby.core.rope.CodeRange.CR_UNKNOWN; -import static org.truffleruby.core.rope.CodeRange.CR_VALID; - -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.util.ArrayDeque; -import java.util.Deque; - -import org.jcodings.Encoding; -import org.jcodings.ascii.AsciiTables; -import org.jcodings.specific.ASCIIEncoding; -import org.jcodings.specific.USASCIIEncoding; -import org.jcodings.specific.UTF8Encoding; -import org.truffleruby.collections.IntStack; -import org.truffleruby.core.Hashing; -import org.truffleruby.core.encoding.EncodingManager; -import org.truffleruby.core.rope.ConcatRope.ConcatState; -import org.truffleruby.core.rope.RopeNodesFactory.WithEncodingNodeGen; -import org.truffleruby.core.string.StringAttributes; -import org.truffleruby.core.string.StringOperations; -import org.truffleruby.core.string.StringSupport; -import org.truffleruby.core.string.StringUtils; - -import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; - -public class RopeOperations { - - @TruffleBoundary - public static LeafRope create(byte[] bytes, Encoding encoding, CodeRange codeRange) { - if (bytes.length == 1) { - final int index = bytes[0] & 0xff; - - if (encoding == UTF8Encoding.INSTANCE) { - return RopeConstants.UTF8_SINGLE_BYTE_ROPES[index]; - } - - if (encoding == USASCIIEncoding.INSTANCE) { - return RopeConstants.US_ASCII_SINGLE_BYTE_ROPES[index]; - } - - if (encoding == ASCIIEncoding.INSTANCE) { - return RopeConstants.ASCII_8BIT_SINGLE_BYTE_ROPES[index]; - } - } - - int characterLength = -1; - - if (codeRange == CR_UNKNOWN) { - final StringAttributes attributes = calculateCodeRangeAndLength(encoding, bytes, 0, bytes.length); - - codeRange = attributes.getCodeRange(); - characterLength = attributes.getCharacterLength(); - } else if (codeRange == CR_VALID || codeRange == CR_BROKEN) { - characterLength = strLength(encoding, bytes, 0, bytes.length); - } - - switch (codeRange) { - case CR_7BIT: - return new AsciiOnlyLeafRope(bytes, encoding); - case CR_VALID: - return new ValidLeafRope(bytes, encoding, characterLength); - case CR_BROKEN: - return new InvalidLeafRope(bytes, encoding, characterLength); - default: { - throw new RuntimeException(StringUtils.format("Unknown code range type: %d", codeRange)); - } - } - } - - @TruffleBoundary - public static LeafRope create(byte b, Encoding encoding, CodeRange codeRange) { - final int index = b & 0xff; - - if (encoding == UTF8Encoding.INSTANCE) { - return RopeConstants.UTF8_SINGLE_BYTE_ROPES[index]; - } - - if (encoding == USASCIIEncoding.INSTANCE) { - return RopeConstants.US_ASCII_SINGLE_BYTE_ROPES[index]; - } - - if (encoding == ASCIIEncoding.INSTANCE) { - return RopeConstants.ASCII_8BIT_SINGLE_BYTE_ROPES[index]; - } - - return create(new byte[]{ b }, encoding, codeRange); - } - - public static Rope emptyRope(Encoding encoding) { - if (encoding == UTF8Encoding.INSTANCE) { - return RopeConstants.EMPTY_UTF8_ROPE; - } - - if (encoding == USASCIIEncoding.INSTANCE) { - return RopeConstants.EMPTY_US_ASCII_ROPE; - } - - if (encoding == ASCIIEncoding.INSTANCE) { - return RopeConstants.EMPTY_ASCII_8BIT_ROPE; - } - - final CodeRange codeRange = encoding.isAsciiCompatible() ? CR_7BIT : CR_VALID; - if (codeRange == CR_7BIT) { - return new AsciiOnlyLeafRope(RopeConstants.EMPTY_BYTES, encoding); - } else { - return new ValidLeafRope(RopeConstants.EMPTY_BYTES, encoding, 0); - } - } - - public static Rope withEncoding(Rope originalRope, Encoding newEncoding) { - return WithEncodingNodeGen.getUncached().executeWithEncoding(originalRope, newEncoding); - } - - public static LeafRope encodeAscii(String value, Encoding encoding) { - return create(encodeAsciiBytes(value), encoding, CR_7BIT); - } - - /** Prefer this to {@code getBytes(StandardCharsets.US_ASCII)} */ - public static byte[] encodeAsciiBytes(String value) { - assert StringOperations.isAsciiOnly(value) : "String contained non ascii characters \"" + value + "\""; - - final byte[] bytes = new byte[value.length()]; - - for (int i = 0; i < bytes.length; i++) { - bytes[i] = (byte) value.charAt(i); - } - - return bytes; - } - - public static String decodeAscii(byte[] bytes) { - return decodeAscii(bytes, 0, bytes.length); - } - - public static String decodeAscii(byte[] bytes, int byteOffset, int byteLength) { - final char[] buffer = new char[byteLength]; - - for (int i = 0; i < byteLength; i++) { - byte b = bytes[byteOffset + i]; - assert b >= 0; - buffer[i] = (char) b; - } - - return newString(buffer); - } - - @TruffleBoundary - private static String newString(char[] buffer) { - return new String(buffer); - } - - @TruffleBoundary - public static String decodeNonAscii(Encoding encoding, byte[] bytes, int byteOffset, int byteLength) { - final Charset charset; - - if (encoding == ASCIIEncoding.INSTANCE) { - for (int i = 0; i < byteLength; i++) { - if (bytes[byteOffset + i] < 0) { - throw new CannotConvertBinaryRubyStringToJavaString(bytes[byteOffset + i] & 0xFF); - } - } - - // Don't misinterpret non-ASCII bytes, use the replacement character to show the loss - charset = StandardCharsets.US_ASCII; - } else { - charset = EncodingManager.charsetForEncoding(encoding); - } - - - return decode(charset, bytes, byteOffset, byteLength); - } - - public static String decodeOrEscapeBinaryRope(Rope rope) { - return decodeOrEscapeBinaryRope(rope, rope.getBytes()); - } - - /** Overload to avoid calling getBytes() and mutate the Rope "bytes" field. */ - @TruffleBoundary - public static String decodeOrEscapeBinaryRope(Rope rope, byte[] bytes) { - if (rope.isAsciiOnly() || rope.getEncoding() != ASCIIEncoding.INSTANCE) { - return decodeRopeSegment(rope, bytes, 0, bytes.length); - } else { - return escapeBinaryRope(bytes); - } - } - - private static String escapeBinaryRope(byte[] bytes) { - // A Rope with BINARY encoding cannot be converted faithfully to a Java String. - // (ISO_8859_1 would just show random characters for bytes above 128) - // Therefore we convert non-US-ASCII characters to "\xNN". - // MRI Symbol#inspect for binary symbols is similar: "\xff".b.to_sym => :"\xFF" - final StringBuilder builder = new StringBuilder(bytes.length); - for (final byte c : bytes) { - if (c >= 0) { // US-ASCII character - builder.append((char) (c & 0xFF)); - } else { - builder.append("\\x").append(String.format("%02X", c & 0xFF)); - } - } - return builder.toString(); - } - - public static String decodeRope(Rope value) { - return decodeRopeSegment(value, 0, value.byteLength()); - } - - @TruffleBoundary - public static String decodeRopeSegment(Rope value, int byteOffset, int byteLength) { - return decodeRopeSegment(value, value.getBytes(), byteOffset, byteLength); - } - - private static String decodeRopeSegment(Rope value, byte[] bytes, int byteOffset, int byteLength) { - if (value.isAsciiOnly()) { - return decodeAscii(bytes, byteOffset, byteLength); - } else { - return decodeNonAscii(value.getEncoding(), bytes, byteOffset, byteLength); - } - } - - /** This method has no side effects, because it does not even have access to the Rope - for debugging only. */ - @TruffleBoundary - public static String decode(Encoding encoding, byte[] bytes) { - if (encoding == ASCIIEncoding.INSTANCE) { - return escapeBinaryRope(bytes); - } else { - final Charset charset = EncodingManager.charsetForEncoding(encoding); - return decode(charset, bytes, 0, bytes.length); - } - } - - private static String decode(Charset charset, byte[] bytes, int byteOffset, int byteLength) { - return new String(bytes, byteOffset, byteLength, charset); - } - - /** See {@link RopeNodes.CalculateAttributesNode} */ - // MRI: rb_enc_strlen_cr - @TruffleBoundary - public static StringAttributes calculateCodeRangeAndLength(Encoding encoding, byte[] bytes, int start, int end) { - if (bytes.length == 0) { - return new StringAttributes(0, encoding.isAsciiCompatible() ? CR_7BIT : CR_VALID); - } else if (encoding == ASCIIEncoding.INSTANCE) { - return strLengthWithCodeRangeBinaryString(bytes, start, end); - } else if (encoding.isAsciiCompatible()) { - return StringSupport.strLengthWithCodeRangeAsciiCompatible(encoding, bytes, start, end); - } else { - return StringSupport.strLengthWithCodeRangeNonAsciiCompatible(encoding, bytes, start, end); - } - } - - @TruffleBoundary - public static int strLength(Encoding enc, byte[] bytes, int p, int end) { - return StringSupport.strLength(enc, bytes, p, end); - } - - private static StringAttributes strLengthWithCodeRangeBinaryString(byte[] bytes, int start, int end) { - CodeRange codeRange = CR_7BIT; - - for (int i = start; i < end; i++) { - if (bytes[i] < 0) { - codeRange = CR_VALID; - break; - } - } - - return new StringAttributes(end - start, codeRange); - } - - /** This method should not be used directly, because it does not cache the result in the Rope. Use - * {@link RopeNodes.BytesNode} or {@link Rope#getBytes()} instead. - * - * Performs an iterative depth first search of the Rope tree to calculate its byte[] without needing to populate the - * byte[] for each level beneath. Every LeafRope has its byte[] populated by definition. The goal is to determine - * which descendant LeafRopes contribute bytes to the top-most Rope's logical byte[] and how many bytes they should - * contribute. Then each such LeafRope copies the appropriate range of bytes to a shared byte[]. - * - * Rope trees can be very deep. An iterative algorithm is preferable to recursion because it removes the overhead of - * stack frame management. Additionally, a recursive algorithm will eventually overflow the stack if the Rope tree - * is too deep. */ - @TruffleBoundary - public static byte[] flattenBytes(Rope rope) { - if (rope.getRawBytes() != null) { - return rope.getRawBytes(); - } - - if (rope instanceof NativeRope) { - return rope.getBytes(); - } - - int bufferPosition = 0; - int byteOffset = 0; - - final byte[] buffer = new byte[rope.byteLength()]; - - // As we traverse the rope tree, we need to keep track of any bounded lengths of SubstringRopes. LeafRopes always - // provide their full byte[]. ConcatRope always provides the full byte[] of each of its children. SubstringRopes, - // in contrast, may bound the length of their children. Since we may have SubstringRopes of SubstringRopes, we - // need to track each SubstringRope's bounded length and how much that bounded length contributes to the total - // byte[] for any ancestor (e.g., a SubstringRope of a ConcatRope with SubstringRopes for each of its children). - // Because we need to track multiple levels, we can't use a single updated int. - final IntStack substringLengths = new IntStack(); - - final Deque workStack = new ArrayDeque<>(); - workStack.push(rope); - - while (!workStack.isEmpty()) { - final Rope current = workStack.pop(); - - // An empty rope trivially cannot contribute to filling the output buffer. - if (current.isEmpty()) { - continue; - } - - final byte[] rawBytes; - if (current instanceof LazyIntRope) { - rawBytes = current.getBytesSlow(); - } else { - rawBytes = current.getRawBytes(); - } - - if (rawBytes != null) { - // In the absence of any SubstringRopes, we always take the full contents of the current rope. - if (substringLengths.isEmpty()) { - System.arraycopy(rawBytes, byteOffset, buffer, bufferPosition, current.byteLength()); - bufferPosition += current.byteLength(); - } else { - int bytesToCopy = substringLengths.pop(); - final int currentBytesToCopy; - - // If we reach here, this rope is a descendant of a SubstringRope at some level. Based on - // the currently calculated byte[] offset and the number of bytes to extract, determine how many - // bytes we can copy to the buffer. - if (bytesToCopy > (current.byteLength() - byteOffset)) { - currentBytesToCopy = current.byteLength() - byteOffset; - } else { - currentBytesToCopy = bytesToCopy; - } - - System.arraycopy(rawBytes, byteOffset, buffer, bufferPosition, currentBytesToCopy); - bufferPosition += currentBytesToCopy; - bytesToCopy -= currentBytesToCopy; - - // If this rope wasn't able to satisfy the remaining byte count from the ancestor SubstringRope, - // update the byte count for the next item in the work queue. - if (bytesToCopy > 0) { - substringLengths.push(bytesToCopy); - } - } - - // By definition, offsets only affect the start of the rope. Once we've copied bytes out of a rope, - // we need to reset the offset or subsequent items in the work queue will copy from the wrong location. - // - // NB: In contrast to the number of bytes to extract, the offset can be shared and updated by multiple - // levels of SubstringRopes. Thus, we do not need to maintain offsets in a stack and it is appropriate - // to clear the offset after the first time we use it, since it will have been updated accordingly at - // each SubstringRope encountered for this SubstringRope ancestry chain. - byteOffset = 0; - - continue; - } - - if (current instanceof ConcatRope) { - final ConcatRope concatRope = (ConcatRope) current; - - final ConcatState state = concatRope.getState(); - if (state.isFlattened()) { - // The rope got concurrently flattened between entering the iteration and reaching here, - // restart the iteration from the top. - workStack.push(concatRope); - continue; - } - - // In the absence of any SubstringRopes, we always take the full contents of the ConcatRope. - if (substringLengths.isEmpty()) { - workStack.push(state.right); - workStack.push(state.left); - } else { - final int leftLength = state.left.byteLength(); - - // If we reach here, this ConcatRope is a descendant of a SubstringRope at some level. Based on - // the currently calculated byte[] offset and the number of bytes to extract, determine which of - // the ConcatRope's children we need to visit. - if (byteOffset < leftLength) { - if ((byteOffset + substringLengths.peek()) > leftLength) { - workStack.push(state.right); - workStack.push(state.left); - } else { - workStack.push(state.left); - } - } else { - // If we can skip the left child entirely, we need to update the offset so it's accurate for - // the right child as each child's starting point is 0. - byteOffset -= leftLength; - workStack.push(state.right); - } - } - } else if (current instanceof SubstringRope) { - final SubstringRope substringRope = (SubstringRope) current; - - workStack.push(substringRope.getChild()); - - // Either we haven't seen another SubstringRope or it's been cleared off the work queue. In either case, - // we can start fresh. - if (substringLengths.isEmpty()) { - substringLengths.push(substringRope.byteLength()); - } else { - // Since we may be taking a substring of a substring, we need to note that we're not extracting the - // entirety of the current SubstringRope. - final int adjustedByteLength = substringRope.byteLength() - byteOffset; - - // We have to do some bookkeeping once we encounter multiple SubstringRopes along the same ancestry - // chain. The top of the stack always indicates the number of bytes to extract from any descendants. - // Any bytes extracted from this SubstringRope must contribute to the total of the parent SubstringRope - // and are thus deducted. We can't simply update a total byte count, however, because we need distinct - // counts for each level. - // - // For example: SubstringRope (byteLength = 6) - // | - // ConcatRope (byteLength = 20) - // / \ - // SubstringRope (byteLength = 4) LeafRope (byteLength = 16) - // | - // LeafRope (byteLength = 50) - // - // In this case we need to know that we're only extracting 4 bytes from descendants of the second - // SubstringRope. And those 4 bytes contribute to the total 6 bytes from the ancestor SubstringRope. - // The top of stack manipulation performed here maintains that invariant. - - if (substringLengths.peek() > adjustedByteLength) { - final int bytesToCopy = substringLengths.pop(); - substringLengths.push(bytesToCopy - adjustedByteLength); - substringLengths.push(adjustedByteLength); - } - } - - // If this SubstringRope is a descendant of another SubstringRope, we need to increment the offset - // so that when we finally reach a rope with its byte[] filled, we're extracting bytes from the correct - // location. - byteOffset += substringRope.getByteOffset(); - } else if (current instanceof RepeatingRope) { - final RepeatingRope repeatingRope = (RepeatingRope) current; - final Rope child = repeatingRope.getChild(); - - // In the absence of any SubstringRopes, we always take the full contents of the RepeatingRope. - if (substringLengths.isEmpty()) { - // TODO (nirvdrum 06-Apr-16) Rather than process the same child over and over, there may be opportunity to re-use the results from a single pass. - for (int i = 0; i < repeatingRope.getTimes(); i++) { - workStack.push(child); - } - } else { - final int bytesToCopy = substringLengths.peek(); - final int patternLength = child.byteLength(); - - // Fix the offset to be appropriate for a given child. The offset is reset the first time it is - // consumed, so there's no need to worry about adversely affecting anything by adjusting it here. - byteOffset %= child.byteLength(); - - final int loopCount = computeLoopCount( - byteOffset, - repeatingRope.getTimes(), - bytesToCopy, - patternLength); - - // TODO (nirvdrum 25-Aug-2016): Flattening the rope with CR_VALID will cause a character length recalculation, even though we already know what it is. That operation should be made more optimal. - final LeafRope flattenedChild; - if (child instanceof LeafRope) { - flattenedChild = (LeafRope) child; - } else { - flattenedChild = create(flattenBytes(child), child.getEncoding(), child.getCodeRange()); - } - for (int i = 0; i < loopCount; i++) { - workStack.push(flattenedChild); - } - } - } else { - throw new UnsupportedOperationException( - "Don't know how to flatten rope of type: " + current.getClass().getName()); - } - } - - return buffer; - } - - private static int computeLoopCount(int offset, int times, int length, int patternLength) { - // The loopCount has to be precisely determined so every repetition has at least some parts used. - // It has to account for the beginning we don't need (offset), has to reach the end but, and must not - // have extra repetitions. However it cannot ever be longer than repeatingRope.getTimes(). - return Integer.min( - times, - (offset + patternLength * length / patternLength + patternLength - 1) / patternLength); - } - - @TruffleBoundary - public static int hashForRange(Rope rope, int startingHashCode, int offset, int length) { - class Params { - final Rope rope; - final int startingHashCode; - final int offset; - final int length; - final boolean readResult; - - Params(Rope rope, int startingHashCode, int offset, int length, boolean readResult) { - this.rope = rope; - this.startingHashCode = startingHashCode; - this.offset = offset; - this.length = length; - this.readResult = readResult; - } - } - - final Deque workStack = new ArrayDeque<>(); - workStack.push(new Params(rope, startingHashCode, offset, length, false)); - int resultHash = 0; - - while (!workStack.isEmpty()) { - final Params params = workStack.pop(); - rope = params.rope; - startingHashCode = params.readResult ? resultHash : params.startingHashCode; - offset = params.offset; - length = params.length; - final byte[] bytes = rope.getRawBytes(); - - if (bytes != null) { - resultHash = Hashing.stringHash(bytes, startingHashCode, offset, length); - } else if (rope instanceof SubstringRope) { - final SubstringRope substringRope = (SubstringRope) rope; - final Rope child = substringRope.getChild(); - final int newOffset = offset + substringRope.getByteOffset(); - workStack.push(new Params(child, startingHashCode, newOffset, length, false)); - } else if (rope instanceof ConcatRope) { - final ConcatRope concatRope = (ConcatRope) rope; - final ConcatState state = concatRope.getState(); - if (state.isFlattened()) { - // Rope got concurrently flattened. - resultHash = Hashing.stringHash(state.bytes, startingHashCode, offset, length); - } else { - final Rope left = state.left; - final Rope right = state.right; - final int leftLength = left.byteLength(); - - if (offset >= leftLength) { - // range fully contained in right child - workStack.push(new Params(right, startingHashCode, offset - leftLength, length, false)); - } else if (offset + length <= leftLength) { - // range fully contained in left child - workStack.push(new Params(left, startingHashCode, offset, length, false)); - } else { - final int coveredByLeft = leftLength - offset; - // push right node first, starting hash is the result from the left node - workStack.push(new Params(right, 0, 0, length - coveredByLeft, true)); - workStack.push(new Params(left, startingHashCode, offset, coveredByLeft, false)); - } - } - } else if (rope instanceof RepeatingRope) { - final RepeatingRope repeatingRope = (RepeatingRope) rope; - final Rope child = repeatingRope.getChild(); - final int patternLength = child.byteLength(); - - offset %= patternLength; - if (length > patternLength - offset) { // bytes to hash > bytes available in current repetition of child - // loop - 1 iteration, reset offset to 0, starting hash is the result from previous iteration - workStack.push(new Params(rope, 0, 0, length - (patternLength - offset), true)); - length = patternLength - offset; - } - - // one iteration - workStack.push(new Params(child, startingHashCode, offset, length, false)); - } else { - resultHash = Hashing.stringHash(rope.getBytes(), startingHashCode, offset, length); - } - } - - return resultHash; - } - - public static RopeBuilder toRopeBuilderCopy(Rope rope) { - return RopeBuilder.createRopeBuilder(rope.getBytes(), rope.getEncoding()); - } - - @TruffleBoundary - public static int caseInsensitiveCmp(Rope value, Rope other) { - // Taken from org.jruby.util.ByteList#caseInsensitiveCmp. - - if (other == value) { - return 0; - } - - final int size = value.byteLength(); - final int len = Math.min(size, other.byteLength()); - final byte[] other_bytes = other.getBytes(); - - for (int offset = -1; ++offset < len;) { - int myCharIgnoreCase = AsciiTables.ToLowerCaseTable[value.getBytes()[offset] & 0xff] & 0xff; - int otherCharIgnoreCase = AsciiTables.ToLowerCaseTable[other_bytes[offset] & 0xff] & 0xff; - if (myCharIgnoreCase < otherCharIgnoreCase) { - return -1; - } else if (myCharIgnoreCase > otherCharIgnoreCase) { - return 1; - } - } - - return size == other.byteLength() ? 0 : size == len ? -1 : 1; - } - - public static Rope ropeFromRopeBuilder(RopeBuilder builder) { - return create(builder.getBytes(), builder.getEncoding(), CR_UNKNOWN); - } - - public static boolean isAsciiOnly(byte[] bytes, Encoding encoding) { - if (!encoding.isAsciiCompatible()) { - return false; - } - - for (int i = 0; i < bytes.length; i++) { - if (bytes[i] < 0) { - return false; - } - } - - return true; - } - - public static boolean isInvalid(byte[] bytes, Encoding encoding) { - final StringAttributes attributes = calculateCodeRangeAndLength(encoding, bytes, 0, bytes.length); - - return attributes.getCodeRange() == CR_BROKEN; - } - - public static boolean anyChildContains(Rope rope, String value) { - if (rope instanceof SubstringRope) { - return anyChildContains(((SubstringRope) rope).getChild(), value); - } - // NOTE(norswap, 18 Dec 2020): We do not treat ConcatRopes specially: `decodeRope` will flatten them - // If we just search left and right, we risk missing the case where the value straddles the two children. - // - // Because of the flattening, the references to the children ropes will be nulled, so we do not need - // to worry about the risk of retaining a substring rope whose child contains the value. - return rope.byteLength() >= value.length() && RopeOperations.decodeRope(rope).contains(value); - } - - public static String escape(Rope rope) { - final StringBuilder builder = new StringBuilder(); - builder.append('"'); - - for (int i = 0; i < rope.byteLength(); i++) { - final byte character = rope.get(i); - switch (character) { - case '\\': - builder.append("\\"); - break; - case '"': - builder.append("\\\""); - break; - default: - if (character >= 32 && character <= 126) { - builder.append((char) character); - } else { - builder.append(StringUtils.format("\\x%02x", character)); - } - break; - } - } - - builder.append('"'); - return builder.toString(); - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/RopeWithEncoding.java b/src/main/java/org/truffleruby/core/rope/RopeWithEncoding.java deleted file mode 100644 index bfc1c62d341e..000000000000 --- a/src/main/java/org/truffleruby/core/rope/RopeWithEncoding.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2021, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.rope; - -import org.truffleruby.core.encoding.RubyEncoding; - -import java.util.Objects; - -public final class RopeWithEncoding { - - private final Rope rope; - private final RubyEncoding encoding; - - public RopeWithEncoding(Rope rope, RubyEncoding encoding) { - assert rope.encoding == encoding.jcoding; - this.rope = rope; - this.encoding = encoding; - } - - public Rope getRope() { - return rope; - } - - public RubyEncoding getEncoding() { - return encoding; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (!(o instanceof RopeWithEncoding)) { - return false; - } - RopeWithEncoding that = (RopeWithEncoding) o; - return rope.equals(that.rope) && encoding == that.encoding; - } - - @Override - public int hashCode() { - return Objects.hash(rope, encoding); - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/SubstringRope.java b/src/main/java/org/truffleruby/core/rope/SubstringRope.java deleted file mode 100644 index b8bca7731c02..000000000000 --- a/src/main/java/org/truffleruby/core/rope/SubstringRope.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2015, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ - -package org.truffleruby.core.rope; - -import com.oracle.truffle.api.profiles.ConditionProfile; -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; - -public final class SubstringRope extends ManagedRope { - - private final ManagedRope child; - private final int byteOffset; - - public SubstringRope( - Encoding encoding, - ManagedRope child, - int offset, - int byteLength, - int characterLength, - CodeRange codeRange) { - // TODO (nirvdrum 07-Jan-16) Verify that this rope is only used for character substrings and not arbitrary byte slices. The former should always have the child's code range while the latter may not. - this(child, encoding, offset, byteLength, characterLength, codeRange); - } - - private SubstringRope( - ManagedRope child, - Encoding encoding, - int byteOffset, - int byteLength, - int characterLength, - CodeRange codeRange) { - // TODO (nirvdrum 07-Jan-16) Verify that this rope is only used for character substrings and not arbitrary byte slices. The former should always have the child's code range while the latter may not. - super(encoding, codeRange, byteLength, characterLength, null); - assert !(child instanceof SubstringRope) : child.getClass(); - this.child = child; - this.byteOffset = byteOffset; - - assert byteLength <= child.byteLength(); - } - - @Override - Rope withEncoding7bit(Encoding newEncoding, ConditionProfile bytesNotNull) { - assert getCodeRange() == CodeRange.CR_7BIT; - return new SubstringRope( - getChild(), - newEncoding, - getByteOffset(), - byteLength(), - characterLength(), - CodeRange.CR_7BIT); - } - - @Override - Rope withBinaryEncoding(ConditionProfile bytesNotNull) { - assert getCodeRange() == CodeRange.CR_VALID; - return new SubstringRope( - getChild(), - ASCIIEncoding.INSTANCE, - getByteOffset(), - byteLength(), - byteLength(), - CodeRange.CR_VALID); - } - - @Override - protected byte getByteSlow(int index) { - return child.get(byteOffset + index); - } - - @Override - protected byte[] getBytesSlow() { - if (child.getRawBytes() != null) { - final byte[] bytes = new byte[byteLength()]; - System.arraycopy(child.getRawBytes(), byteOffset, bytes, 0, byteLength()); - return bytes; - } - - return super.getBytesSlow(); - } - - public ManagedRope getChild() { - return child; - } - - public int getByteOffset() { - return byteOffset; - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/TruffleRopesNodes.java b/src/main/java/org/truffleruby/core/rope/TruffleRopesNodes.java deleted file mode 100644 index 48fe2d294cc7..000000000000 --- a/src/main/java/org/truffleruby/core/rope/TruffleRopesNodes.java +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (c) 2013, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.rope; - -import com.oracle.truffle.api.library.CachedLibrary; -import org.jcodings.specific.UTF8Encoding; -import org.truffleruby.builtins.CoreMethod; -import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; -import org.truffleruby.builtins.CoreModule; -import org.truffleruby.cext.CExtNodes; -import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.ConcatRope.ConcatState; -import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; -import org.truffleruby.core.string.StringOperations; -import org.truffleruby.core.string.StringUtils; -import org.truffleruby.language.NotProvided; - -import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; -import com.oracle.truffle.api.dsl.Cached; -import com.oracle.truffle.api.dsl.Specialization; -import org.truffleruby.language.library.RubyStringLibrary; - -@CoreModule("Truffle::Ropes") -public abstract class TruffleRopesNodes { - - @CoreMethod(names = "dump_string", onSingleton = true, required = 1) - public abstract static class DumpStringNode extends CoreMethodArrayArgumentsNode { - - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); - - @TruffleBoundary - @Specialization(guards = "strings.isRubyString(string)") - protected RubyString dumpString(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - final StringBuilder builder = new StringBuilder(); - - final Rope rope = strings.getRope(string); - - for (int i = 0; i < rope.byteLength(); i++) { - builder.append(StringUtils.format("\\x%02x", rope.get(i))); - } - - return makeStringNode.executeMake(builder.toString(), Encodings.UTF_8, CodeRange.CR_UNKNOWN); - } - - } - - @CoreMethod(names = "debug_print_rope", onSingleton = true, required = 1, optional = 1) - public abstract static class DebugPrintRopeNode extends CoreMethodArrayArgumentsNode { - - @Child private RopeNodes.DebugPrintRopeNode debugPrintRopeNode = RopeNodesFactory.DebugPrintRopeNodeGen - .create(); - - @TruffleBoundary - @Specialization(guards = "strings.isRubyString(string)") - protected Object debugPrintDefault(Object string, NotProvided printString, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - return debugPrint(string, true, strings); - } - - @TruffleBoundary - @Specialization(guards = "strings.isRubyString(string)") - protected Object debugPrint(Object string, boolean printString, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - System.err.println("Legend: "); - System.err.println("BN = Bytes Null? (byte[] not yet populated)"); - System.err.println("BL = Byte Length"); - System.err.println("CL = Character Length"); - System.err.println("CR = Code Range"); - System.err.println("O = Byte Offset (SubstringRope only)"); - System.err.println("T = Times (RepeatingRope only)"); - System.err.println("V = Value (LazyIntRope only)"); - System.err.println("E = Encoding"); - System.err.println("P = Native Pointer (NativeRope only)"); - System.err.println("S = Native Size (NativeRope only)"); - - return debugPrintRopeNode.executeDebugPrint(strings.getRope(string), 0, printString); - } - } - - /** The returned string (when evaluated) will create a string with the same Rope structure as the string which is - * passed as argument. */ - @CoreMethod(names = "debug_get_structure_creation", onSingleton = true, required = 1) - public abstract static class DebugGetStructureCreationNode extends CoreMethodArrayArgumentsNode { - - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); - - @TruffleBoundary - @Specialization(guards = "strings.isRubyString(string)") - protected RubyString getStructure(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - Rope rope = strings.getRope(string); - String result = getStructure(rope); - byte[] bytes = StringOperations.encodeBytes(result, UTF8Encoding.INSTANCE); - return makeStringNode.executeMake( - bytes, - strings.getEncoding(string), - CodeRange.CR_7BIT); - } - - protected static String getStructure(Rope rope) { - if (rope instanceof LeafRope) { - return getStructure((LeafRope) rope); - } else if (rope instanceof ConcatRope) { - return getStructure((ConcatRope) rope); - } else if (rope instanceof SubstringRope) { - return getStructure((SubstringRope) rope); - } else if (rope instanceof RepeatingRope) { - return getStructure((RepeatingRope) rope); - } else { - return "(unknown rope class: " + rope.getClass() + ")"; - } - } - - private static String getStructure(LeafRope rope) { - return RopeOperations.escape(rope); - } - - private static String getStructure(ConcatRope rope) { - final ConcatState state = rope.getState(); - return state.isFlattened() - ? "(\"flat concat rope\"; " + RopeOperations.escape(rope) + ")" - : "(" + getStructure(state.left) + " + " + getStructure(state.right) + ")"; - } - - private static String getStructure(SubstringRope rope) { - final Rope child = rope.getChild(); - final int characterOffset = RopeOperations - .strLength(child.getEncoding(), child.getBytes(), 0, rope.getByteOffset()); - return getStructure(child) + "[" + characterOffset + ", " + rope.characterLength() + "]"; - } - - private static String getStructure(RepeatingRope rope) { - return "(" + getStructure(rope.getChild()) + "*" + rope.getTimes() + ")"; - } - - } - - @CoreMethod(names = "bytes?", onSingleton = true, required = 1) - public abstract static class HasBytesNode extends CoreMethodArrayArgumentsNode { - - @Specialization(guards = "strings.isRubyString(string)") - protected boolean hasBytes(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - return strings.getRope(string).getRawBytes() != null; - } - - } - - @CoreMethod(names = "flatten_rope", onSingleton = true, required = 1) - public abstract static class FlattenRopeNode extends CoreMethodArrayArgumentsNode { - - @Specialization(guards = "libString.isRubyString(string)") - protected RubyString flattenRope(Object string, - @Cached RopeNodes.FlattenNode flattenNode, - @Cached StringNodes.MakeStringNode makeStringNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - final LeafRope flattened = flattenNode.executeFlatten(libString.getRope(string)); - final RubyEncoding rubyEncoding = libString.getEncoding(string); - return makeStringNode.fromRope(flattened, rubyEncoding); - } - - } - - @CoreMethod(names = "convert_to_native", onSingleton = true, required = 1) - public abstract static class NativeRopeNode extends CoreMethodArrayArgumentsNode { - - @Specialization(guards = "strings.isRubyString(string)") - protected Object nativeRope(Object string, - @Cached CExtNodes.StringToNativeNode toNativeNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - toNativeNode.executeToNative(string); - return string; - } - - } - - /* Truffle.create_simple_string creates a string 'test' without any part of the string escaping. Useful for testing - * compilation of String because most other ways to construct a string can currently escape. */ - - @CoreMethod(names = "create_simple_string", onSingleton = true) - public abstract static class CreateSimpleStringNode extends CoreMethodArrayArgumentsNode { - - @Specialization - protected RubyString createSimpleString( - @Cached StringNodes.MakeStringNode makeStringNode) { - return makeStringNode - .fromRope( - new AsciiOnlyLeafRope(new byte[]{ 't', 'e', 's', 't' }, UTF8Encoding.INSTANCE), - Encodings.UTF_8); - } - - } - -} diff --git a/src/main/java/org/truffleruby/core/rope/ValidLeafRope.java b/src/main/java/org/truffleruby/core/rope/ValidLeafRope.java deleted file mode 100644 index 7447c630aa90..000000000000 --- a/src/main/java/org/truffleruby/core/rope/ValidLeafRope.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2015, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ - -package org.truffleruby.core.rope; - -import com.oracle.truffle.api.profiles.ConditionProfile; -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; - -import com.oracle.truffle.api.CompilerDirectives; - -public class ValidLeafRope extends LeafRope { - - public ValidLeafRope(byte[] bytes, Encoding encoding, int characterLength) { - super(bytes, encoding, CodeRange.CR_VALID, characterLength); - - assert !RopeOperations.isAsciiOnly(bytes, encoding) : "ASCII-only string incorrectly marked as CR_VALID"; - assert !RopeOperations.isInvalid(bytes, encoding) : "Broken string incorrectly marked as CR_VALID"; - } - - @Override - Rope withEncoding7bit(Encoding newEncoding, ConditionProfile bytesNotNull) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - throw new UnsupportedOperationException("Must only be called for ASCII-only Strings"); - } - - @Override - Rope withBinaryEncoding(ConditionProfile bytesNotNull) { - return new ValidLeafRope(getRawBytes(), ASCIIEncoding.INSTANCE, byteLength()); - } -} diff --git a/src/main/java/org/truffleruby/core/string/ATStringWithEncoding.java b/src/main/java/org/truffleruby/core/string/ATStringWithEncoding.java new file mode 100644 index 000000000000..ac9b1d44f226 --- /dev/null +++ b/src/main/java/org/truffleruby/core/string/ATStringWithEncoding.java @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2022 Oracle and/or its affiliates. All rights reserved. This + * code is released under a tri EPL/GPL/LGPL license. You can use it, + * redistribute it and/or modify it under the terms of the: + * + * Eclipse Public License version 2.0, or + * GNU General Public License version 2, or + * GNU Lesser General Public License version 2.1. + */ +package org.truffleruby.core.string; + +import com.oracle.truffle.api.CompilerAsserts; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.language.library.RubyStringLibrary; + +import com.oracle.truffle.api.strings.AbstractTruffleString; + +/** AbstractTruffleString with RubyEncoding */ +public final class ATStringWithEncoding extends TStringWithEncodingBase { + + public ATStringWithEncoding(AbstractTruffleString tstring, RubyEncoding encoding) { + super(tstring, encoding); + } + + public ATStringWithEncoding(RubyStringLibrary stringLib, Object string) { + super(stringLib.getTString(string), stringLib.getEncoding(string)); + } + + @Override + public TStringWithEncoding asImmutable() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return new TStringWithEncoding(tstring.asTruffleStringUncached(encoding.tencoding), encoding); + } + +} diff --git a/src/main/java/org/truffleruby/core/rope/BytesKey.java b/src/main/java/org/truffleruby/core/string/BytesKey.java similarity index 82% rename from src/main/java/org/truffleruby/core/rope/BytesKey.java rename to src/main/java/org/truffleruby/core/string/BytesKey.java index 393aedc514b0..47f7c6b099da 100644 --- a/src/main/java/org/truffleruby/core/rope/BytesKey.java +++ b/src/main/java/org/truffleruby/core/string/BytesKey.java @@ -7,11 +7,13 @@ * GNU General Public License version 2, or * GNU Lesser General Public License version 2.1. */ -package org.truffleruby.core.rope; +package org.truffleruby.core.string; import java.util.Arrays; import org.jcodings.Encoding; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.TStringUtils; public class BytesKey { @@ -42,7 +44,7 @@ public boolean equals(Object o) { @Override public String toString() { - return RopeOperations.decode(encoding, bytes); + return TStringUtils.fromByteArray(bytes, Encodings.getBuiltInEncoding(encoding)).toString(); } } diff --git a/src/main/java/org/truffleruby/core/rope/CannotConvertBinaryRubyStringToJavaString.java b/src/main/java/org/truffleruby/core/string/CannotConvertBinaryRubyStringToJavaString.java similarity index 96% rename from src/main/java/org/truffleruby/core/rope/CannotConvertBinaryRubyStringToJavaString.java rename to src/main/java/org/truffleruby/core/string/CannotConvertBinaryRubyStringToJavaString.java index d5ea97b89bbc..a2bd4ff30f11 100644 --- a/src/main/java/org/truffleruby/core/rope/CannotConvertBinaryRubyStringToJavaString.java +++ b/src/main/java/org/truffleruby/core/string/CannotConvertBinaryRubyStringToJavaString.java @@ -7,7 +7,7 @@ * GNU General Public License version 2, or * GNU Lesser General Public License version 2.1. */ -package org.truffleruby.core.rope; +package org.truffleruby.core.string; public class CannotConvertBinaryRubyStringToJavaString extends RuntimeException { diff --git a/src/main/java/org/truffleruby/core/string/ConvertBytes.java b/src/main/java/org/truffleruby/core/string/ConvertBytes.java index 7135563acce4..44abd91508a2 100644 --- a/src/main/java/org/truffleruby/core/string/ConvertBytes.java +++ b/src/main/java/org/truffleruby/core/string/ConvertBytes.java @@ -17,13 +17,11 @@ import java.util.Arrays; import com.oracle.truffle.api.CompilerDirectives; +import com.oracle.truffle.api.strings.AbstractTruffleString; import org.truffleruby.RubyContext; import org.truffleruby.core.CoreLibrary; +import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.numeric.FixnumOrBignumNode; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.language.control.RaiseException; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; @@ -33,7 +31,7 @@ public class ConvertBytes { private final RubyContext context; private final Node caller; private final FixnumOrBignumNode fixnumOrBignumNode; - private final Rope rope; + private final AbstractTruffleString rope; private int p; private int end; private byte[] data; @@ -44,19 +42,21 @@ public ConvertBytes( RubyContext context, Node caller, FixnumOrBignumNode fixnumOrBignumNode, - RopeNodes.BytesNode bytesNode, - Rope rope, + AbstractTruffleString rope, + RubyEncoding encoding, int base, boolean badcheck) { assert rope != null; + var byteArray = rope.getInternalByteArrayUncached(encoding.tencoding); + this.context = context; this.caller = caller; this.fixnumOrBignumNode = fixnumOrBignumNode; this.rope = rope; - this.p = 0; - this.data = bytesNode.execute(rope); - this.end = data.length; + this.p = byteArray.getOffset(); + this.data = byteArray.getArray(); + this.end = byteArray.getEnd(); this.badcheck = badcheck; this.base = base; } @@ -65,15 +65,14 @@ public ConvertBytes( static { MIN_VALUE_BYTES = new byte[37][]; for (int i = 2; i <= 36; i++) { - MIN_VALUE_BYTES[i] = RopeOperations.encodeAsciiBytes(Long.toString(Long.MIN_VALUE, i)); + MIN_VALUE_BYTES[i] = StringOperations.encodeAsciiBytes(Long.toString(Long.MIN_VALUE, i)); } } /** rb_cstr_to_inum */ public static Object bytesToInum(RubyContext context, Node caller, FixnumOrBignumNode fixnumOrBignumNode, - RopeNodes.BytesNode bytesNode, - Rope rope, int base, boolean badcheck) { - return new ConvertBytes(context, caller, fixnumOrBignumNode, bytesNode, rope, base, badcheck).bytesToInum(); + AbstractTruffleString rope, RubyEncoding encoding, int base, boolean badcheck) { + return new ConvertBytes(context, caller, fixnumOrBignumNode, rope, encoding, base, badcheck).bytesToInum(); } /** conv_digit */ @@ -536,7 +535,7 @@ public Kind getKind() { private void invalidString() { throw new RaiseException( context, - context.getCoreExceptions().argumentErrorInvalidStringToInteger(rope, caller)); + context.getCoreExceptions().argumentErrorInvalidStringToInteger(rope.toJavaStringUncached(), caller)); } public static final byte[] intToBinaryBytes(int i) { @@ -548,8 +547,8 @@ public static final byte[] intToOctalBytes(int i) { } public static final byte[] intToHexBytes(int i, boolean upper) { - RopeBuilder ropeBuilder = intToUnsignedBytes(i, 4, upper ? UPPER_DIGITS : LOWER_DIGITS); - return ropeBuilder.getBytes(); + TStringBuilder tstringBuilder = intToUnsignedBytes(i, 4, upper ? UPPER_DIGITS : LOWER_DIGITS); + return tstringBuilder.getBytes(); } public static final byte[] intToByteArray(int i, int radix, boolean upper) { @@ -569,26 +568,26 @@ public static final byte[] longToOctalBytes(long i) { } public static final byte[] longToHexBytes(long i, boolean upper) { - RopeBuilder ropeBuilder = longToUnsignedBytes(i, 4, upper ? UPPER_DIGITS : LOWER_DIGITS); - return ropeBuilder.getBytes(); + TStringBuilder tstringBuilder = longToUnsignedBytes(i, 4, upper ? UPPER_DIGITS : LOWER_DIGITS); + return tstringBuilder.getBytes(); } public static final byte[] longToByteArray(long i, int radix, boolean upper) { - RopeBuilder ropeBuilder = longToBytes(i, radix, upper ? UPPER_DIGITS : LOWER_DIGITS); - return ropeBuilder.getBytes(); + TStringBuilder tstringBuilder = longToBytes(i, radix, upper ? UPPER_DIGITS : LOWER_DIGITS); + return tstringBuilder.getBytes(); } public static final byte[] longToCharBytes(long i) { return longToBytes(i, 10, LOWER_DIGITS).getBytes(); } - public static final RopeBuilder longToBytes(long i, int radix, byte[] digitmap) { + public static final TStringBuilder longToBytes(long i, int radix, byte[] digitmap) { if (i == 0) { - return RopeBuilder.createRopeBuilder(ZERO_BYTES); + return TStringBuilder.create(ZERO_BYTES); } if (i == Long.MIN_VALUE) { - return RopeBuilder.createRopeBuilder(MIN_VALUE_BYTES[radix]); + return TStringBuilder.create(MIN_VALUE_BYTES[radix]); } boolean neg = false; @@ -609,10 +608,10 @@ public static final RopeBuilder longToBytes(long i, int radix, byte[] digitmap) buf[--pos] = (byte) '-'; } - return RopeBuilder.createRopeBuilder(buf, pos, len - pos); + return TStringBuilder.create(buf, pos, len - pos); } - private static final RopeBuilder intToUnsignedBytes(int i, int shift, byte[] digitmap) { + private static final TStringBuilder intToUnsignedBytes(int i, int shift, byte[] digitmap) { byte[] buf = new byte[32]; int charPos = 32; int radix = 1 << shift; @@ -621,10 +620,10 @@ private static final RopeBuilder intToUnsignedBytes(int i, int shift, byte[] dig buf[--charPos] = digitmap[(int) (i & mask)]; i >>>= shift; } while (i != 0); - return RopeBuilder.createRopeBuilder(buf, charPos, (32 - charPos)); + return TStringBuilder.create(buf, charPos, (32 - charPos)); } - private static final RopeBuilder longToUnsignedBytes(long i, int shift, byte[] digitmap) { + private static final TStringBuilder longToUnsignedBytes(long i, int shift, byte[] digitmap) { byte[] buf = new byte[64]; int charPos = 64; int radix = 1 << shift; @@ -633,7 +632,7 @@ private static final RopeBuilder longToUnsignedBytes(long i, int shift, byte[] d buf[--charPos] = digitmap[(int) (i & mask)]; i >>>= shift; } while (i != 0); - return RopeBuilder.createRopeBuilder(buf, charPos, (64 - charPos)); + return TStringBuilder.create(buf, charPos, (64 - charPos)); } public static final byte[] twosComplementToBinaryBytes(byte[] in) { diff --git a/src/main/java/org/truffleruby/core/string/CoreString.java b/src/main/java/org/truffleruby/core/string/CoreString.java index 007bdbc6b2b0..e8c72a0c5e4b 100644 --- a/src/main/java/org/truffleruby/core/string/CoreString.java +++ b/src/main/java/org/truffleruby/core/string/CoreString.java @@ -9,23 +9,20 @@ */ package org.truffleruby.core.string; -import org.jcodings.specific.ASCIIEncoding; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.CompilationFinal; -import org.truffleruby.core.rope.RopeOperations; public class CoreString { private final RubyLanguage language; private final String literal; - @CompilationFinal private volatile Rope rope; + @CompilationFinal private volatile TruffleString tstring; public CoreString(RubyLanguage language, String literal) { assert language != null; @@ -34,23 +31,25 @@ public CoreString(RubyLanguage language, String literal) { this.literal = literal; } - public Rope getRope() { - if (rope == null) { + public TruffleString getTruffleString() { + if (tstring == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); - rope = language.ropeCache.getRope( - RopeOperations.encodeAsciiBytes(literal), - // Binary because error message Strings have a ASCII-8BIT encoding on MRI. - // When used for creating a Symbol, the encoding is adapted as needed. - ASCIIEncoding.INSTANCE, - CodeRange.CR_7BIT); + // Binary because error message Strings have a ASCII-8BIT encoding on MRI. + // When used for creating a Symbol, the encoding is adapted as needed. + tstring = language.tstringCache.getTString(StringOperations.encodeAsciiBytes(literal), Encodings.BINARY); } - return rope; + return tstring; } public RubyString createInstance(RubyContext context) { - return StringOperations.createString(context, language, getRope(), Encodings.BINARY); + return new RubyString( + context.getCoreLibrary().stringClass, + language.stringShape, + false, + getTruffleString(), + Encodings.BINARY); } private static boolean is7Bit(String literal) { diff --git a/src/main/java/org/truffleruby/core/string/DoubleConverter.java b/src/main/java/org/truffleruby/core/string/DoubleConverter.java index 8a72a168255a..0d2ab6277dac 100644 --- a/src/main/java/org/truffleruby/core/string/DoubleConverter.java +++ b/src/main/java/org/truffleruby/core/string/DoubleConverter.java @@ -28,7 +28,8 @@ ***** END LICENSE BLOCK *****/ package org.truffleruby.core.string; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.parser.SafeDoubleParser; public class DoubleConverter { @@ -58,16 +59,17 @@ public class DoubleConverter { public DoubleConverter() { } - public void init(Rope rope, boolean isStrict) { - bytes = rope.getBytes(); - index = 0; - endIndex = index + rope.byteLength(); + public void init(AbstractTruffleString rope, RubyEncoding encoding, boolean isStrict) { + var byteArray = rope.getInternalByteArrayUncached(encoding.tencoding); + bytes = byteArray.getArray(); + index = byteArray.getOffset(); + endIndex = byteArray.getEnd(); this.isStrict = isStrict; // +2 for added exponent: E... // The algorithm trades digits for inc/dec exponent. // Worse case is adding E-1 when no exponent, // it trades one digit for 3 chars. - chars = new char[Math.min(rope.byteLength() + 2, MAX_LENGTH)]; + chars = new char[Math.min(byteArray.getLength() + 2, MAX_LENGTH)]; charsIndex = 0; significantDigitsProcessed = 0; adjustExponent = 0; @@ -175,8 +177,8 @@ private boolean strictError() { /** Everything runs in 1.9+ mode now, so the `is19` parameter is vestigial. However, in order to maintain binary * compatibility with extensions we can't just change the signature either. */ - public double parse(Rope rope, boolean strict, boolean is19) { - init(rope, strict); + public double parse(AbstractTruffleString rope, RubyEncoding encoding, boolean strict, boolean is19) { + init(rope, encoding, strict); if (skipWhitespace()) { return completeCalculation(); diff --git a/src/main/java/org/truffleruby/core/string/EncodingUtils.java b/src/main/java/org/truffleruby/core/string/EncodingUtils.java index 99a0db51118a..97de6a710c44 100644 --- a/src/main/java/org/truffleruby/core/string/EncodingUtils.java +++ b/src/main/java/org/truffleruby/core/string/EncodingUtils.java @@ -26,34 +26,19 @@ ***** END LICENSE BLOCK *****/ package org.truffleruby.core.string; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; -import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; -import com.oracle.truffle.api.nodes.Node; import org.jcodings.Encoding; import org.jcodings.ascii.AsciiTables; import org.jcodings.specific.ASCIIEncoding; -import org.truffleruby.RubyContext; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.language.control.RaiseException; public class EncodingUtils { // rb_enc_asciicompat public static boolean encAsciicompat(Encoding enc) { - return encMbminlen(enc) == 1 && !encDummy(enc); - } - - // rb_enc_mbminlen - public static int encMbminlen(Encoding encoding) { - return encoding.minLength(); - } - - // rb_enc_dummy_p - public static boolean encDummy(Encoding enc) { - return enc.isDummy(); + return enc.minLength() == 1 && !enc.isDummy(); } public static boolean DECORATOR_P(byte[] sname, byte[] dname) { @@ -85,7 +70,7 @@ public static List encodingNames(byte[] name, int p, int end) { boolean isValid = false; if (s >= end) { isValid = true; - names.add(RopeOperations.decodeAscii(name, p, end)); + names.add(new String(name, p, end - p, StandardCharsets.US_ASCII)); } if (!isValid || hasLower) { @@ -116,7 +101,7 @@ public static List encodingNames(byte[] name, int p, int end) { } } if (hasUpper) { - names.add(RopeOperations.decodeAscii(constName)); + names.add(new String(constName, StandardCharsets.US_ASCII)); } } if (hasLower) { @@ -126,7 +111,7 @@ public static List encodingNames(byte[] name, int p, int end) { constName[s] = AsciiTables.ToUpperCaseTable[code]; } } - names.add(RopeOperations.decodeAscii(constName)); + names.add(new String(constName, StandardCharsets.US_ASCII)); } } @@ -134,64 +119,4 @@ public static List encodingNames(byte[] name, int p, int end) { } - // rb_enc_ascget - public static int encAscget(byte[] pBytes, int p, int e, int[] len, Encoding enc, CodeRange codeRange) { - int c; - int l; - - if (e <= p) { - return -1; - } - - if (encAsciicompat(enc)) { - c = pBytes[p] & 0xFF; - if (!Encoding.isAscii((byte) c)) { - return -1; - } - if (len != null) { - len[0] = 1; - } - return c; - } - l = StringSupport.characterLength(enc, codeRange, pBytes, p, e); - if (!StringSupport.MBCLEN_CHARFOUND_P(l)) { - return -1; - } - c = enc.mbcToCode(pBytes, p, e); - if (!Encoding.isAscii(c)) { - return -1; - } - if (len != null) { - len[0] = l; - } - return c; - } - - // rb_enc_codepoint_len - @TruffleBoundary - public static int encCodepointLength(byte[] pBytes, int p, int e, int[] len_p, Encoding enc, CodeRange codeRange, - Node node) { - int r; - if (e <= p) { - final RubyContext context = RubyContext.get(node); - throw new RaiseException(context, context.getCoreExceptions().argumentError("empty string", node)); - } - r = StringSupport.characterLength(enc, codeRange, pBytes, p, e); - if (!StringSupport.MBCLEN_CHARFOUND_P(r)) { - final RubyContext context = RubyContext.get(node); - throw new RaiseException( - context, - context.getCoreExceptions().argumentError("invalid byte sequence in " + enc, node)); - } - if (len_p != null) { - len_p[0] = StringSupport.MBCLEN_CHARFOUND_LEN(r); - } - return StringSupport.codePoint(enc, codeRange, pBytes, p, e, node); - } - - // rb_enc_mbcput - public static int encMbcput(int c, byte[] buf, int p, Encoding enc) { - return enc.codeToMbc(c, buf, p); - } - } diff --git a/src/main/java/org/truffleruby/core/string/FrozenStringLiterals.java b/src/main/java/org/truffleruby/core/string/FrozenStringLiterals.java index cbd097427d8b..1363153d2c05 100644 --- a/src/main/java/org/truffleruby/core/string/FrozenStringLiterals.java +++ b/src/main/java/org/truffleruby/core/string/FrozenStringLiterals.java @@ -11,53 +11,56 @@ import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; -import org.jcodings.Encoding; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.collections.WeakValueCache; -import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.LeafRope; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeCache; +import org.truffleruby.core.encoding.TStringUtils; import java.util.ArrayList; import java.util.Collection; import java.util.List; +// TODO: should rename to ImmutableStrings public class FrozenStringLiterals { private static final List STRINGS_TO_CACHE = new ArrayList<>(); - private final RopeCache ropeCache; - private final WeakValueCache values = new WeakValueCache<>(); + private final TStringCache tstringCache; + private final WeakValueCache values = new WeakValueCache<>(); - public FrozenStringLiterals(RopeCache ropeCache) { - this.ropeCache = ropeCache; + public FrozenStringLiterals(TStringCache tStringCache) { + this.tstringCache = tStringCache; for (ImmutableRubyString name : STRINGS_TO_CACHE) { addFrozenStringToCache(name); } } @TruffleBoundary - public ImmutableRubyString getFrozenStringLiteral(Rope rope) { - return getFrozenStringLiteral(rope.getBytes(), rope.getEncoding(), rope.getCodeRange()); + public ImmutableRubyString getFrozenStringLiteral(TruffleString tstring, RubyEncoding encoding) { + if (tstring.isNative()) { + throw CompilerDirectives.shouldNotReachHere(); + } + + return getFrozenStringLiteral(TStringUtils.getBytesOrCopy(tstring, encoding), encoding); } @TruffleBoundary - public ImmutableRubyString getFrozenStringLiteral(byte[] bytes, Encoding encoding, CodeRange codeRange) { - // Ensure all ImmutableRubyString have a Rope from the RopeCache - final LeafRope cachedRope = ropeCache.getRope(bytes, encoding, codeRange); + public ImmutableRubyString getFrozenStringLiteral(byte[] bytes, RubyEncoding encoding) { + // Ensure all ImmutableRubyString have a TruffleString from the TStringCache + var cachedTString = tstringCache.getTString(bytes, encoding); + var tstringWithEncoding = new TStringWithEncoding(cachedTString, encoding); - final ImmutableRubyString string = values.get(cachedRope); + final ImmutableRubyString string = values.get(tstringWithEncoding); if (string != null) { return string; } else { - final RubyEncoding rubyEncoding = Encodings.getBuiltInEncoding(encoding.getIndex()); - return values.addInCacheIfAbsent(cachedRope, new ImmutableRubyString(cachedRope, rubyEncoding)); + return values.addInCacheIfAbsent(tstringWithEncoding, + new ImmutableRubyString(cachedTString, encoding)); } } - public static ImmutableRubyString createStringAndCacheLater(LeafRope name, RubyEncoding encoding) { + public static ImmutableRubyString createStringAndCacheLater(TruffleString name, + RubyEncoding encoding) { final ImmutableRubyString string = new ImmutableRubyString(name, encoding); assert !STRINGS_TO_CACHE.contains(string); STRINGS_TO_CACHE.add(string); @@ -65,9 +68,11 @@ public static ImmutableRubyString createStringAndCacheLater(LeafRope name, RubyE } private void addFrozenStringToCache(ImmutableRubyString string) { - final LeafRope cachedRope = ropeCache.getRope(string.rope); - assert cachedRope == string.rope; - final ImmutableRubyString existing = values.addInCacheIfAbsent(string.rope, string); + var encoding = string.getEncodingUncached(); + var cachedTString = tstringCache.getTString(string.tstring, encoding); + assert cachedTString == string.tstring; + var tstringWithEncoding = new TStringWithEncoding(cachedTString, encoding); + final ImmutableRubyString existing = values.addInCacheIfAbsent(tstringWithEncoding, string); if (existing != string) { throw CompilerDirectives .shouldNotReachHere("Duplicate ImmutableRubyString in FrozenStringLiterals: " + existing); diff --git a/src/main/java/org/truffleruby/core/string/FrozenStrings.java b/src/main/java/org/truffleruby/core/string/FrozenStrings.java index 07858c1ee0c8..9a2f962dfb73 100644 --- a/src/main/java/org/truffleruby/core/string/FrozenStrings.java +++ b/src/main/java/org/truffleruby/core/string/FrozenStrings.java @@ -9,18 +9,22 @@ */ package org.truffleruby.core.string; -import org.jcodings.specific.ASCIIEncoding; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.LeafRope; +import com.oracle.truffle.api.strings.TruffleString; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; import java.util.ArrayList; import java.util.List; -import static org.truffleruby.core.encoding.Encodings.BINARY; - public class FrozenStrings { - public static final List ROPES = new ArrayList<>(); + public static final List TSTRINGS = new ArrayList<>(); + + public static final ImmutableRubyString EMPTY_US_ASCII = FrozenStringLiterals.createStringAndCacheLater( + TStringConstants.EMPTY_US_ASCII, + Encodings.US_ASCII); + public static final ImmutableRubyString YIELD = createFrozenStaticBinaryString("yield"); public static final ImmutableRubyString ASSIGNMENT = createFrozenStaticBinaryString("assignment"); public static final ImmutableRubyString CLASS_VARIABLE = createFrozenStaticBinaryString("class variable"); @@ -38,10 +42,14 @@ public class FrozenStrings { private static ImmutableRubyString createFrozenStaticBinaryString(String string) { // defined?(...) returns frozen strings with a binary encoding - final LeafRope rope = StringOperations.encodeRope(string, ASCIIEncoding.INSTANCE, CodeRange.CR_7BIT); - ROPES.add(rope); - return FrozenStringLiterals.createStringAndCacheLater(rope, BINARY); + return createFrozenStaticString(string, Encodings.BINARY); } + private static ImmutableRubyString createFrozenStaticString(String string, RubyEncoding encoding) { + // defined?(...) returns frozen strings with a binary encoding + var tstring = TStringUtils.fromJavaString(string, encoding); + TSTRINGS.add(tstring); + return FrozenStringLiterals.createStringAndCacheLater(tstring, encoding); + } } diff --git a/src/main/java/org/truffleruby/core/string/ImmutableRubyString.java b/src/main/java/org/truffleruby/core/string/ImmutableRubyString.java index c4b734074349..8d1378d18fde 100644 --- a/src/main/java/org/truffleruby/core/string/ImmutableRubyString.java +++ b/src/main/java/org/truffleruby/core/string/ImmutableRubyString.java @@ -9,8 +9,10 @@ */ package org.truffleruby.core.string; +import com.oracle.truffle.api.CompilerAsserts; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.Cached; +import com.oracle.truffle.api.dsl.ImportStatic; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.interop.InteropLibrary; import com.oracle.truffle.api.interop.TruffleObject; @@ -18,83 +20,84 @@ import com.oracle.truffle.api.library.ExportLibrary; import com.oracle.truffle.api.library.ExportMessage; import com.oracle.truffle.api.profiles.ConditionProfile; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; +import org.truffleruby.cext.CExtNodes; +import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.kernel.KernelNodes; import org.truffleruby.core.klass.RubyClass; -import org.truffleruby.core.rope.LeafRope; -import org.truffleruby.core.rope.NativeRope; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.language.ImmutableRubyObjectCopyable; +import org.truffleruby.extra.ffi.Pointer; +import org.truffleruby.language.RubyBaseNode; import org.truffleruby.language.dispatch.DispatchNode; import org.truffleruby.language.library.RubyStringLibrary; /** All ImmutableRubyString are interned and must be created through - * {@link FrozenStringLiterals#getFrozenStringLiteral(Rope)}. */ + * {@link FrozenStringLiterals#getFrozenStringLiteral}. */ @ExportLibrary(InteropLibrary.class) -@ExportLibrary(RubyStringLibrary.class) -public class ImmutableRubyString extends ImmutableRubyObjectCopyable implements TruffleObject { +public final class ImmutableRubyString extends ImmutableRubyObjectCopyable implements TruffleObject { - public final LeafRope rope; - public final RubyEncoding encoding; - private NativeRope nativeRope = null; + public final TruffleString tstring; + private final RubyEncoding encoding; + private Pointer nativeString = null; - ImmutableRubyString(LeafRope rope, RubyEncoding encoding) { - assert rope.encoding == encoding.jcoding; - this.rope = rope; + ImmutableRubyString(TruffleString tstring, RubyEncoding encoding) { + assert tstring.isCompatibleTo(encoding.tencoding); + assert tstring.isManaged(); + this.tstring = tstring; this.encoding = encoding; } /** should only be used for debugging */ @Override public String toString() { - return rope.toString(); + return tstring.toString(); + } + + public TruffleString asTruffleStringUncached() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + assert !tstring.isNative(); + return tstring; + } + + public String getJavaString() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return TStringUtils.toJavaStringOrThrow(tstring, getEncodingUncached()); } public boolean isNative() { - return nativeRope != null; + return nativeString != null; } - public NativeRope getNativeRope(RubyLanguage language) { - if (nativeRope == null) { - return createNativeRope(language); + public Pointer getNativeString(RubyLanguage language) { + if (nativeString == null) { + return createNativeString(language); } - return nativeRope; + return nativeString; } @TruffleBoundary - private synchronized NativeRope createNativeRope(RubyLanguage language) { - if (nativeRope == null) { - nativeRope = new NativeRope(language, rope.getBytes(), rope.getEncoding(), rope.characterLength(), - rope.getCodeRange()); + private synchronized Pointer createNativeString(RubyLanguage language) { + if (nativeString == null) { + var tencoding = getEncodingUncached().tencoding; + int byteLength = tstring.byteLength(tencoding); + nativeString = CExtNodes.StringToNativeNode.allocateAndCopyToNative(tstring, tencoding, byteLength, + TruffleString.CopyToNativeMemoryNode.getUncached(), language); } - return nativeRope; + return nativeString; } - // region RubyStringLibrary messages - @ExportMessage - public RubyEncoding getEncoding() { + public RubyEncoding getEncodingUncached() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); return encoding; } - @ExportMessage - protected boolean isRubyString() { - return true; - } - - @ExportMessage - protected Rope getRope() { - return rope; - } - - @ExportMessage - protected String getJavaString() { - return RopeOperations.decodeRope(rope); + public RubyEncoding getEncodingUnprofiled() { + return encoding; } - // endregion // region InteropLibrary messages @ExportMessage @@ -126,33 +129,47 @@ protected boolean isString() { return true; } + @ExportMessage + protected TruffleString asTruffleString() { + assert !tstring.isNative(); + return tstring; + } + + @ImportStatic(RubyBaseNode.class) @ExportMessage public static class AsString { @Specialization( - guards = "equalsNode.execute(string.rope, cachedRope)", + guards = "equalNode.execute(string.tstring, libString.getEncoding(string), cachedTString, cachedEncoding)", limit = "getLimit()") protected static String asStringCached(ImmutableRubyString string, - @Cached("string.rope") Rope cachedRope, + @Cached RubyStringLibrary libString, + @Cached("string.asTruffleStringUncached()") TruffleString cachedTString, + @Cached("string.getEncodingUncached()") RubyEncoding cachedEncoding, @Cached("string.getJavaString()") String javaString, - @Cached RopeNodes.EqualNode equalsNode) { + @Cached StringHelperNodes.EqualNode equalNode) { return javaString; } @Specialization(replaces = "asStringCached") protected static String asStringUncached(ImmutableRubyString string, - @Cached ConditionProfile asciiOnlyProfile, - @Cached RopeNodes.AsciiOnlyNode asciiOnlyNode, - @Cached RopeNodes.BytesNode bytesNode) { - final Rope rope = string.rope; - final byte[] bytes = bytesNode.execute(rope); - - if (asciiOnlyProfile.profile(asciiOnlyNode.execute(rope))) { - return RopeOperations.decodeAscii(bytes); + @Cached RubyStringLibrary libString, + @Cached TruffleString.GetByteCodeRangeNode codeRangeNode, + @Cached TruffleString.ToJavaStringNode toJavaStringNode, + @Cached ConditionProfile binaryNonAsciiProfile) { + var encoding = libString.getEncoding(string); + if (binaryNonAsciiProfile.profile(encoding == Encodings.BINARY && + !StringGuards.is7Bit(string.tstring, encoding, codeRangeNode))) { + return getJavaStringBoundary(string); } else { - return RopeOperations.decodeNonAscii(rope.getEncoding(), bytes, 0, bytes.length); + return toJavaStringNode.execute(string.tstring); } } + @TruffleBoundary + private static String getJavaStringBoundary(ImmutableRubyString string) { + return string.getJavaString(); + } + protected static int getLimit() { return RubyLanguage.getCurrentLanguage().options.INTEROP_CONVERT_CACHE; } diff --git a/src/main/java/org/truffleruby/core/string/InterpolatedStringNode.java b/src/main/java/org/truffleruby/core/string/InterpolatedStringNode.java index be5f65e87270..a7532eea8416 100644 --- a/src/main/java/org/truffleruby/core/string/InterpolatedStringNode.java +++ b/src/main/java/org/truffleruby/core/string/InterpolatedStringNode.java @@ -9,12 +9,11 @@ */ package org.truffleruby.core.string; +import com.oracle.truffle.api.strings.TruffleString; import org.jcodings.Encoding; import org.truffleruby.core.cast.ToSNode; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.language.RubyContextSourceNode; import com.oracle.truffle.api.CompilerDirectives; @@ -28,14 +27,14 @@ public final class InterpolatedStringNode extends RubyContextSourceNode { @Child private StringNodes.StringAppendPrimitiveNode appendNode; - private final Rope emptyRope; private final RubyEncoding encoding; + private final TruffleString emptyTString; public InterpolatedStringNode(ToSNode[] children, Encoding encoding) { assert children.length > 0; this.children = children; - this.emptyRope = RopeOperations.emptyRope(encoding); - this.encoding = Encodings.getBuiltInEncoding(encoding.getIndex()); + this.encoding = Encodings.getBuiltInEncoding(encoding); + this.emptyTString = this.encoding.tencoding.getEmpty(); } @ExplodeLoop @@ -43,7 +42,7 @@ public InterpolatedStringNode(ToSNode[] children, Encoding encoding) { public Object execute(VirtualFrame frame) { // Start with an empty string to ensure the result has class String and the proper encoding. - RubyString builder = StringOperations.createString(this, emptyRope, encoding); + RubyString builder = createString(emptyTString, encoding); // TODO (nirvdrum 11-Jan-16) Rewrite to avoid massively unbalanced trees. for (ToSNode child : children) { diff --git a/src/main/java/org/truffleruby/core/rope/PathToRopeCache.java b/src/main/java/org/truffleruby/core/string/PathToTStringCache.java similarity index 59% rename from src/main/java/org/truffleruby/core/rope/PathToRopeCache.java rename to src/main/java/org/truffleruby/core/string/PathToTStringCache.java index ca4ea62cf5d3..1ec9eb9dfb6d 100644 --- a/src/main/java/org/truffleruby/core/rope/PathToRopeCache.java +++ b/src/main/java/org/truffleruby/core/string/PathToTStringCache.java @@ -7,59 +7,60 @@ * GNU General Public License version 2, or * GNU Lesser General Public License version 2.1. */ -package org.truffleruby.core.rope; +package org.truffleruby.core.string; import java.util.WeakHashMap; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; -import org.jcodings.specific.UTF8Encoding; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyLanguage; -import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.TStringUtils; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.source.Source; -/** A cache from {@link RubyLanguage#getPath(Source) the Source path} to a Rope. The Rope is kept alive as long as the - * Source is reachable. */ -public class PathToRopeCache { +/** A cache from {@link RubyLanguage#getPath(Source) the Source path} to a TruffleString. The TruffleString is kept + * alive as long as the Source is reachable. */ +public class PathToTStringCache { private final RubyLanguage language; private final ReadWriteLock lock = new ReentrantReadWriteLock(); - private final WeakHashMap javaStringToRope = new WeakHashMap<>(); + private final WeakHashMap javaStringToTString = new WeakHashMap<>(); - public PathToRopeCache(RubyLanguage language) { + public PathToTStringCache(RubyLanguage language) { this.language = language; } @TruffleBoundary - public Rope getCachedPath(Source source) { + public TruffleString getCachedPath(Source source) { final String path = language.getSourcePath(source); final Lock readLock = lock.readLock(); readLock.lock(); try { - final Rope rope = javaStringToRope.get(path); - if (rope != null) { - return rope; + var tstring = javaStringToTString.get(path); + if (tstring != null) { + return tstring; } } finally { readLock.unlock(); } - final Rope cachedRope = language.ropeCache.getRope( - StringOperations.encodeRope(path, UTF8Encoding.INSTANCE)); + final TruffleString cachedString = language.tstringCache.getTString(TStringUtils.utf8TString(path), + Encodings.UTF_8); final Lock writeLock = lock.writeLock(); writeLock.lock(); try { - javaStringToRope.putIfAbsent(path, cachedRope); + javaStringToTString.putIfAbsent(path, cachedString); } finally { writeLock.unlock(); } - return cachedRope; + return cachedString; } } diff --git a/src/main/java/org/truffleruby/core/string/RubyString.java b/src/main/java/org/truffleruby/core/string/RubyString.java index c5f791533adf..94807059db29 100644 --- a/src/main/java/org/truffleruby/core/string/RubyString.java +++ b/src/main/java/org/truffleruby/core/string/RubyString.java @@ -9,17 +9,23 @@ */ package org.truffleruby.core.string; +import com.oracle.truffle.api.CompilerAsserts; +import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; +import com.oracle.truffle.api.dsl.Cached.Shared; +import com.oracle.truffle.api.dsl.ImportStatic; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.interop.InteropLibrary; import com.oracle.truffle.api.library.ExportLibrary; import com.oracle.truffle.api.profiles.ConditionProfile; -import org.jcodings.Encoding; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.MutableTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyLanguage; +import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.klass.RubyClass; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.language.RubyBaseNode; import org.truffleruby.language.RubyDynamicObject; import org.truffleruby.language.library.RubyLibrary; @@ -30,66 +36,72 @@ @ExportLibrary(RubyLibrary.class) @ExportLibrary(InteropLibrary.class) -@ExportLibrary(RubyStringLibrary.class) -public class RubyString extends RubyDynamicObject { +@ImportStatic(RubyBaseNode.class) +public final class RubyString extends RubyDynamicObject { public boolean frozen; public boolean locked = false; - public Rope rope; - public RubyEncoding encoding; - - public RubyString(RubyClass rubyClass, Shape shape, boolean frozen, Rope rope, RubyEncoding rubyEncoding) { + public AbstractTruffleString tstring; + private RubyEncoding encoding; + + public RubyString( + RubyClass rubyClass, + Shape shape, + boolean frozen, + AbstractTruffleString tstring, + RubyEncoding rubyEncoding) { super(rubyClass, shape); - assert rope.encoding == rubyEncoding.jcoding; + assert tstring.isCompatibleTo(rubyEncoding.tencoding); this.frozen = frozen; - this.rope = rope; + this.tstring = tstring; this.encoding = rubyEncoding; } - public void setRope(Rope rope) { - assert rope.encoding == encoding.jcoding : rope.encoding.toString() + " does not equal " + - encoding.jcoding.toString(); - this.rope = rope; + public void setTString(AbstractTruffleString tstring) { + assert tstring.isCompatibleTo(getEncodingUncached().tencoding); + this.tstring = tstring; } - public void setRope(Rope rope, RubyEncoding encoding) { - assert rope.encoding == encoding.jcoding; - this.rope = rope; + public void setTString(AbstractTruffleString tstring, RubyEncoding encoding) { + assert tstring.isCompatibleTo(encoding.tencoding); + this.tstring = tstring; this.encoding = encoding; } + public void clearCodeRange() { + assert tstring.isNative(); + ((MutableTruffleString) tstring).notifyExternalMutation(); + } + /** should only be used for debugging */ @Override public String toString() { - return rope.toString(); + return tstring.toString(); } - public Encoding getJCoding() { - assert encoding.jcoding == rope.encoding; - return encoding.jcoding; + public TruffleString asTruffleStringUncached() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return tstring.asTruffleStringUncached(getEncodingUncached().tencoding); } - // region RubyStringLibrary messages - @ExportMessage - public RubyEncoding getEncoding() { - return encoding; + public String getJavaString() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return TStringUtils.toJavaStringOrThrow(tstring, getEncodingUncached()); } - @ExportMessage - protected boolean isRubyString() { - return true; + public int byteLengthUncached() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return tstring.byteLength(getEncodingUncached().tencoding); } - @ExportMessage - protected Rope getRope() { - return rope; + public RubyEncoding getEncodingUncached() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return encoding; } - @ExportMessage - protected String getJavaString() { - return RopeOperations.decodeRope(rope); + public RubyEncoding getEncodingUnprofiled() { + return encoding; } - // endregion // region RubyLibrary messages @ExportMessage @@ -109,33 +121,48 @@ protected boolean isString() { return true; } + @ExportMessage + protected TruffleString asTruffleString( + @Cached @Shared("libString") RubyStringLibrary libString, + @Cached TruffleString.AsTruffleStringNode asTruffleStringNode) { + return asTruffleStringNode.execute(tstring, libString.getTEncoding(this)); + } + + @ImportStatic(RubyBaseNode.class) @ExportMessage public static class AsString { @Specialization( - guards = "equalsNode.execute(string.rope, cachedRope)", + guards = "equalNode.execute(string.tstring, libString.getEncoding(string), cachedTString, cachedEncoding)", limit = "getLimit()") protected static String asStringCached(RubyString string, - @Cached("string.rope") Rope cachedRope, + @Cached RubyStringLibrary libString, + @Cached("string.asTruffleStringUncached()") TruffleString cachedTString, + @Cached("string.getEncodingUncached()") RubyEncoding cachedEncoding, @Cached("string.getJavaString()") String javaString, - @Cached RopeNodes.EqualNode equalsNode) { + @Cached StringHelperNodes.EqualNode equalNode) { return javaString; } @Specialization(replaces = "asStringCached") protected static String asStringUncached(RubyString string, - @Cached ConditionProfile asciiOnlyProfile, - @Cached RopeNodes.AsciiOnlyNode asciiOnlyNode, - @Cached RopeNodes.BytesNode bytesNode) { - final Rope rope = string.rope; - final byte[] bytes = bytesNode.execute(rope); - - if (asciiOnlyProfile.profile(asciiOnlyNode.execute(rope))) { - return RopeOperations.decodeAscii(bytes); + @Cached @Shared("libString") RubyStringLibrary libString, + @Cached TruffleString.GetByteCodeRangeNode codeRangeNode, + @Cached TruffleString.ToJavaStringNode toJavaStringNode, + @Cached ConditionProfile binaryNonAsciiProfile) { + var encoding = libString.getEncoding(string); + if (binaryNonAsciiProfile.profile(encoding == Encodings.BINARY && + !StringGuards.is7Bit(string.tstring, encoding, codeRangeNode))) { + return getJavaStringBoundary(string); } else { - return RopeOperations.decodeNonAscii(rope.getEncoding(), bytes, 0, bytes.length); + return toJavaStringNode.execute(string.tstring); } } + @TruffleBoundary + private static String getJavaStringBoundary(RubyString string) { + return string.getJavaString(); + } + protected static int getLimit() { return RubyLanguage.getCurrentLanguage().options.INTEROP_CONVERT_CACHE; } diff --git a/src/main/java/org/truffleruby/core/string/StringAttributes.java b/src/main/java/org/truffleruby/core/string/StringAttributes.java deleted file mode 100644 index ca22a2ba96ad..000000000000 --- a/src/main/java/org/truffleruby/core/string/StringAttributes.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2018, 2019 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.string; - -import org.truffleruby.core.rope.CodeRange; - -public final class StringAttributes { - private final int characterLength; - private final CodeRange codeRange; - - public StringAttributes(int characterLength, CodeRange codeRange) { - this.characterLength = characterLength; - this.codeRange = codeRange; - } - - public int getCharacterLength() { - return characterLength; - } - - public CodeRange getCodeRange() { - return codeRange; - } -} diff --git a/src/main/java/org/truffleruby/core/string/StringCachingGuards.java b/src/main/java/org/truffleruby/core/string/StringCachingGuards.java deleted file mode 100644 index efbc8010e5d6..000000000000 --- a/src/main/java/org/truffleruby/core/string/StringCachingGuards.java +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright (c) 2015, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.string; - -import org.truffleruby.core.rope.Rope; - -public abstract class StringCachingGuards { - - public static Rope privatizeRope(RubyString string) { - // TODO (nirvdrum 25-Jan-16) Should we flatten the rope to avoid caching a potentially deep rope tree? - return string.rope; - } - -} diff --git a/src/main/java/org/truffleruby/core/string/StringGuards.java b/src/main/java/org/truffleruby/core/string/StringGuards.java index 0c85d6cff55e..36ddc48c52ab 100644 --- a/src/main/java/org/truffleruby/core/string/StringGuards.java +++ b/src/main/java/org/truffleruby/core/string/StringGuards.java @@ -10,90 +10,79 @@ package org.truffleruby.core.string; +import com.oracle.truffle.api.CompilerAsserts; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import org.jcodings.Config; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.string.StringHelperNodes.SingleByteOptimizableNode; + +import static com.oracle.truffle.api.strings.TruffleString.CodeRange.ASCII; +import static com.oracle.truffle.api.strings.TruffleString.CodeRange.BROKEN; +import static com.oracle.truffle.api.strings.TruffleString.CodeRange.VALID; public class StringGuards { private static final int CASE_FULL_UNICODE = 0; - public static boolean isSingleByteOptimizable(Rope rope, - RopeNodes.SingleByteOptimizableNode singleByteOptimizableNode) { - return singleByteOptimizableNode.execute(rope); + // Also known as isAsciiOnly() + public static boolean is7Bit(AbstractTruffleString tstring, RubyEncoding encoding, + TruffleString.GetByteCodeRangeNode codeRangeNode) { + return codeRangeNode.execute(tstring, encoding.tencoding) == ASCII; } - public static boolean isSingleByteOptimizable(RubyString string, - RopeNodes.SingleByteOptimizableNode singleByteOptimizableNode) { - - final Rope rope = string.rope; - return singleByteOptimizableNode.execute(rope); + public static boolean is7BitUncached(AbstractTruffleString tstring, RubyEncoding encoding) { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return tstring.getByteCodeRangeUncached(encoding.tencoding) == ASCII; } - public static boolean is7Bit(Rope rope, RopeNodes.CodeRangeNode codeRangeNode) { - return codeRangeNode.execute(rope) == CodeRange.CR_7BIT; + public static boolean isValid(AbstractTruffleString tstring, RubyEncoding encoding, + TruffleString.GetByteCodeRangeNode codeRangeNode) { + return codeRangeNode.execute(tstring, encoding.tencoding) == VALID; } - public static boolean isAsciiCompatible(Rope rope) { - return rope.getEncoding().isAsciiCompatible(); + public static boolean isBrokenCodeRange(AbstractTruffleString string, RubyEncoding encoding, + TruffleString.GetByteCodeRangeNode codeRangeNode) { + return codeRangeNode.execute(string, encoding.tencoding) == BROKEN; } - public static boolean isAsciiCompatible(RubyString string) { - return string.rope.getEncoding().isAsciiCompatible(); + public static boolean isBrokenCodeRange(AbstractTruffleString string, TruffleString.Encoding encoding, + TruffleString.GetByteCodeRangeNode codeRangeNode) { + return codeRangeNode.execute(string, encoding) == BROKEN; } - public static boolean isFixedWidthEncoding(Rope rope) { - return rope.getEncoding().isFixedWidth(); + public static boolean isSingleByteOptimizable(AbstractTruffleString tString, RubyEncoding encoding, + SingleByteOptimizableNode singleByteOptimizableNode) { + return singleByteOptimizableNode.execute(tString, encoding); } - public static boolean isValidUtf8(Rope rope, RopeNodes.CodeRangeNode codeRangeNode) { - return rope.getEncoding().isUTF8() && codeRangeNode.execute(rope) == CodeRange.CR_VALID; + public static boolean isAsciiCompatible(RubyEncoding encoding) { + return encoding.isAsciiCompatible; } - public static boolean isEmpty(Rope rope) { - return rope.isEmpty(); + public static boolean isFixedWidthEncoding(RubyEncoding encoding) { + return encoding.isFixedWidth; } - public static boolean isBrokenCodeRange(Rope rope, RopeNodes.CodeRangeNode codeRangeNode) { - return codeRangeNode.execute(rope) == CodeRange.CR_BROKEN; - } - - public static boolean isSingleByteString(Rope rope) { - return rope.byteLength() == 1; - } - - public static boolean canMemcmp(Rope sourceRope, Rope patternRope, - RopeNodes.SingleByteOptimizableNode singleByteNode) { - - return (singleByteNode.execute(sourceRope) || sourceRope.getEncoding().isUTF8()) && - (singleByteNode.execute(patternRope) || patternRope.getEncoding().isUTF8()); + public static boolean isEmpty(AbstractTruffleString string) { + return string.isEmpty(); } /** The case mapping is simple (ASCII-only or full Unicode): no complex option like Turkic, case-folding, etc. */ - public static boolean isAsciiCompatMapping(int caseMappingOptions) { + private static boolean isAsciiCompatMapping(int caseMappingOptions) { return caseMappingOptions == CASE_FULL_UNICODE || caseMappingOptions == Config.CASE_ASCII_ONLY; } - /** The string can be optimized to single-byte representation and is a simple case mapping (ASCII-only or full - * Unicode). */ - public static boolean isSingleByteCaseMapping(RubyString string, int caseMappingOptions, - RopeNodes.SingleByteOptimizableNode singleByteOptimizableNode) { - return isSingleByteOptimizable(string, singleByteOptimizableNode) && isAsciiCompatMapping(caseMappingOptions); - } - - /** The string's encoding is ASCII-compatible, the mapping is ASCII-only and {@link #isSingleByteCaseMapping} is not - * applicable. */ - public static boolean isSimpleAsciiCaseMapping(RubyString string, int caseMappingOptions, - RopeNodes.SingleByteOptimizableNode singleByteOptimizableNode) { - return !isSingleByteOptimizable(string, singleByteOptimizableNode) && - caseMappingOptions == Config.CASE_ASCII_ONLY && isAsciiCompatible(string); + /** The mapping is ASCII-only or effectively ASCII-only based on the string properties. */ + private static boolean isAsciiCodePointsMapping(AbstractTruffleString tstring, RubyEncoding encoding, + int caseMappingOptions, SingleByteOptimizableNode singleByteOptimizableNode) { + return isSingleByteOptimizable(tstring, encoding, singleByteOptimizableNode) + ? isAsciiCompatMapping(caseMappingOptions) + : caseMappingOptions == Config.CASE_ASCII_ONLY && isAsciiCompatible(encoding); } - /** Both {@link #isSingleByteCaseMapping} and {@link #isSimpleAsciiCaseMapping} are not applicable. */ - public static boolean isComplexCaseMapping(RubyString string, int caseMappingOptions, - RopeNodes.SingleByteOptimizableNode singleByteOptimizableNode) { - return !isSingleByteCaseMapping(string, caseMappingOptions, singleByteOptimizableNode) && - !isSimpleAsciiCaseMapping(string, caseMappingOptions, singleByteOptimizableNode); + public static boolean isComplexCaseMapping(AbstractTruffleString tstring, RubyEncoding encoding, + int caseMappingOptions, SingleByteOptimizableNode singleByteOptimizableNode) { + return !isAsciiCodePointsMapping(tstring, encoding, caseMappingOptions, singleByteOptimizableNode); } } diff --git a/src/main/java/org/truffleruby/core/string/StringHelperNodes.java b/src/main/java/org/truffleruby/core/string/StringHelperNodes.java new file mode 100644 index 000000000000..940d92676cbf --- /dev/null +++ b/src/main/java/org/truffleruby/core/string/StringHelperNodes.java @@ -0,0 +1,609 @@ +/* + * Copyright (c) 2022 Oracle and/or its affiliates. All rights reserved. This + * code is released under a tri EPL/GPL/LGPL license. You can use it, + * redistribute it and/or modify it under the terms of the: + * + * Eclipse Public License version 2.0, or + * GNU General Public License version 2, or + * GNU Lesser General Public License version 2.1. + */ +package org.truffleruby.core.string; + +import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; +import com.oracle.truffle.api.dsl.Bind; +import com.oracle.truffle.api.dsl.Cached; +import com.oracle.truffle.api.dsl.GenerateUncached; +import com.oracle.truffle.api.dsl.ImportStatic; +import com.oracle.truffle.api.dsl.Specialization; +import com.oracle.truffle.api.library.CachedLibrary; +import com.oracle.truffle.api.nodes.ExplodeLoop; +import com.oracle.truffle.api.nodes.Node; +import com.oracle.truffle.api.object.DynamicObjectLibrary; +import com.oracle.truffle.api.profiles.BranchProfile; +import com.oracle.truffle.api.profiles.ConditionProfile; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.TruffleString; +import com.oracle.truffle.api.strings.TruffleString.ErrorHandling; +import com.oracle.truffle.api.strings.TruffleStringIterator; +import org.truffleruby.Layouts; +import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; +import org.truffleruby.core.encoding.EncodingNodes; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.language.Nil; +import org.truffleruby.language.RubyBaseNode; +import org.truffleruby.language.control.RaiseException; +import org.truffleruby.language.library.RubyStringLibrary; + +public abstract class StringHelperNodes { + + @TruffleBoundary + static Object trTransHelper(EncodingNodes.CheckEncodingNode checkEncodingNode, RubyString self, + RubyStringLibrary libFromStr, Object fromStr, + RubyStringLibrary libToStr, Object toStr, boolean sFlag, Node node) { + final RubyEncoding e1 = checkEncodingNode.executeCheckEncoding(self, fromStr); + final RubyEncoding e2 = checkEncodingNode.executeCheckEncoding(self, toStr); + final RubyEncoding enc = e1 == e2 ? e1 : checkEncodingNode.executeCheckEncoding(fromStr, toStr); + + var selfTStringWithEnc = new ATStringWithEncoding(self.tstring, self.getEncodingUncached()); + var fromStrTStringWithEnc = new ATStringWithEncoding(libFromStr, fromStr); + var toStrTStringWithEnc = new ATStringWithEncoding(libToStr, toStr); + final TruffleString ret = StringSupport.trTransHelper(selfTStringWithEnc, fromStrTStringWithEnc, + toStrTStringWithEnc, e1.jcoding, enc, sFlag, node); + if (ret == null) { + return Nil.INSTANCE; + } + + self.setTString(ret, enc); + return self; + } + + public abstract static class SingleByteOptimizableNode extends RubyBaseNode { + public static SingleByteOptimizableNode create() { + return StringHelperNodesFactory.SingleByteOptimizableNodeGen.create(); + } + + public abstract boolean execute(AbstractTruffleString string, RubyEncoding encoding); + + @Specialization + protected boolean isSingleByteOptimizable(AbstractTruffleString string, RubyEncoding encoding, + @Cached ConditionProfile asciiOnlyProfile, + @Cached TruffleString.GetByteCodeRangeNode getByteCodeRangeNode) { + if (asciiOnlyProfile.profile(StringGuards.is7Bit(string, encoding, getByteCodeRangeNode))) { + return true; + } else { + return encoding.isSingleByte; + } + } + } + + /** The node to use for inline caches to compare if two TruffleString are equal. It behaves the same as String#==, + * without coercion. Note that the two encodings do no need to be the same for this node to return true. If you need + * to ensure the encoding is the same, use {@link EqualSameEncodingNode}. + * + * Two strings are considered equal if they are the same byte-by-byte and: + *

    + *
  • Both strings have the same encoding
  • + *
  • Both strings are 7-bit (and so both have an ASCII-compatible encoding)
  • + *
  • Both strings are empty (regardless of their encodings)
  • + *
+ */ + public abstract static class EqualNode extends RubyBaseNode { + + public final boolean execute(RubyStringLibrary libString, Object rubyString, + TruffleString cachedString, RubyEncoding cachedEncoding) { + return execute(libString.getTString(rubyString), libString.getEncoding(rubyString), + cachedString, cachedEncoding); + } + + // cachedString is TruffleString to ensure correctness, caching on a MutableTruffleString is incorrect + public abstract boolean execute(AbstractTruffleString tstring, RubyEncoding encoding, + TruffleString cachedString, RubyEncoding cachedEncoding); + + @Specialization + protected boolean equal(AbstractTruffleString a, RubyEncoding encA, TruffleString b, RubyEncoding encB, + @Cached EncodingNodes.NegotiateCompatibleStringEncodingNode negotiateCompatibleStringEncodingNode, + @Cached StringEqualInternalNode stringEqualInternalNode) { + var compatibleEncoding = negotiateCompatibleStringEncodingNode.execute(a, encA, b, encB); + return stringEqualInternalNode.executeInternal(a, b, compatibleEncoding); + } + } + + @GenerateUncached + public abstract static class EqualSameEncodingNode extends RubyBaseNode { + + public final boolean execute(RubyStringLibrary libString, Object rubyString, + TruffleString cachedString, RubyEncoding cachedEncoding) { + return execute(libString.getTString(rubyString), libString.getEncoding(rubyString), + cachedString, cachedEncoding); + } + + // cachedString is TruffleString to ensure correctness, caching on a MutableTruffleString is incorrect + public abstract boolean execute(AbstractTruffleString tstring, RubyEncoding encoding, + TruffleString cachedString, RubyEncoding cachedEncoding); + + @Specialization(guards = "encA == encB") + protected boolean same(AbstractTruffleString a, RubyEncoding encA, TruffleString b, RubyEncoding encB, + @Cached StringEqualInternalNode stringEqualInternalNode) { + return stringEqualInternalNode.executeInternal(a, b, encA); + } + + @Specialization(guards = "encA != encB") + protected boolean diff(AbstractTruffleString a, RubyEncoding encA, TruffleString b, RubyEncoding encB) { + return false; + } + } + + @GenerateUncached + public abstract static class StringEqualInternalNode extends RubyBaseNode { + // compatibleEncoding is RubyEncoding or null + public abstract boolean executeInternal(AbstractTruffleString a, AbstractTruffleString b, + RubyEncoding compatibleEncoding); + + @Specialization(guards = "a.isEmpty() || b.isEmpty()") + protected boolean empty(AbstractTruffleString a, AbstractTruffleString b, RubyEncoding compatibleEncoding) { + assert compatibleEncoding != null; + return a.isEmpty() && b.isEmpty(); + } + + @Specialization(guards = { "compatibleEncoding != null", "!a.isEmpty()", "!b.isEmpty()" }) + protected boolean equalBytes(AbstractTruffleString a, AbstractTruffleString b, RubyEncoding compatibleEncoding, + @Cached TruffleString.EqualNode equalNode) { + return equalNode.execute(a, b, compatibleEncoding.tencoding); + } + + @Specialization(guards = "compatibleEncoding == null") + protected boolean notComparable( + AbstractTruffleString a, AbstractTruffleString b, RubyEncoding compatibleEncoding) { + return false; + } + } + + @ImportStatic(StringGuards.class) + public abstract static class CountRopesNode extends TrTableNode { + + public static CountRopesNode create() { + return StringHelperNodesFactory.CountRopesNodeFactory.create(null); + } + + public abstract int executeCount(Object string, TStringWithEncoding[] ropesWithEncs); + + @Specialization(guards = "isEmpty(strings.getTString(string))") + protected int count(Object string, Object[] args, + @Cached RubyStringLibrary strings) { + return 0; + } + + @Specialization( + guards = { + "cachedArgs.length > 0", + "!isEmpty(tstring)", + "cachedArgs.length == args.length", + "argsMatch(cachedArgs, args)", + "encoding == cachedEncoding" }) + protected int countFast(Object string, TStringWithEncoding[] args, + @Cached(value = "args", dimensions = 1) TStringWithEncoding[] cachedArgs, + @Cached RubyStringLibrary libString, + @Bind("libString.getTString(string)") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding, + @Cached("libString.getEncoding(string)") RubyEncoding cachedEncoding, + @Cached(value = "squeeze()", dimensions = 1) boolean[] squeeze, + @Cached("findEncoding(libString.getTString(string), libString.getEncoding(string), cachedArgs)") RubyEncoding compatEncoding, + @Cached("makeTables(cachedArgs, squeeze, compatEncoding)") StringSupport.TrTables tables, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode, + @Cached TruffleString.GetByteCodeRangeNode getByteCodeRangeNode) { + var byteArray = byteArrayNode.execute(tstring, encoding.tencoding); + var codeRange = getByteCodeRangeNode.execute(tstring, encoding.tencoding); + return StringSupport.strCount(byteArray, codeRange, squeeze, tables, compatEncoding.jcoding, this); + } + + @Specialization(guards = "!isEmpty(libString.getTString(string))") + protected int count(Object string, TStringWithEncoding[] ropesWithEncs, + @Cached BranchProfile errorProfile, + @Cached RubyStringLibrary libString, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode, + @Cached TruffleString.GetByteCodeRangeNode getByteCodeRangeNode) { + if (ropesWithEncs.length == 0) { + errorProfile.enter(); + throw new RaiseException(getContext(), coreExceptions().argumentErrorEmptyVarargs(this)); + } + + var tstring = libString.getTString(string); + var encoding = libString.getEncoding(string); + var byteArray = byteArrayNode.execute(tstring, encoding.tencoding); + var codeRange = getByteCodeRangeNode.execute(tstring, encoding.tencoding); + + RubyEncoding enc = findEncoding(tstring, encoding, ropesWithEncs); + return countSlow(byteArray, codeRange, ropesWithEncs, enc); + } + + @TruffleBoundary + private int countSlow(InternalByteArray byteArray, TruffleString.CodeRange codeRange, + TStringWithEncoding[] ropesWithEncs, RubyEncoding enc) { + final boolean[] table = squeeze(); + final StringSupport.TrTables tables = makeTables(ropesWithEncs, table, enc); + return StringSupport.strCount(byteArray, codeRange, table, tables, enc.jcoding, this); + } + } + + public abstract static class TrTableNode extends CoreMethodArrayArgumentsNode { + @Child protected EncodingNodes.CheckStringEncodingNode checkEncodingNode = EncodingNodes.CheckStringEncodingNode + .create(); + @Child protected TruffleString.EqualNode equalNode = TruffleString.EqualNode.create(); + + protected boolean[] squeeze() { + return new boolean[StringSupport.TRANS_SIZE + 1]; + } + + protected RubyEncoding findEncoding(AbstractTruffleString tstring, RubyEncoding encoding, + TStringWithEncoding[] ropes) { + RubyEncoding enc = checkEncodingNode.executeCheckEncoding(tstring, encoding, ropes[0].tstring, + ropes[0].encoding); + for (int i = 1; i < ropes.length; i++) { + enc = checkEncodingNode.executeCheckEncoding(tstring, encoding, ropes[i].tstring, ropes[i].encoding); + } + return enc; + } + + protected StringSupport.TrTables makeTables(TStringWithEncoding[] ropesWithEncs, boolean[] squeeze, + RubyEncoding enc) { + // The trSetupTable method will consume the bytes from the rope one encoded character at a time and + // build a TrTable from this. Previously we started with the encoding of rope zero, and at each + // stage found a compatible encoding to build that TrTable with. Although we now calculate a single + // encoding with which to build the tables it must be compatible with all ropes, so will not + // affect the consumption of characters from those ropes. + StringSupport.TrTables tables = StringSupport.trSetupTable( + ropesWithEncs[0].tstring, + ropesWithEncs[0].encoding, + squeeze, + null, + true, + enc.jcoding, + this); + + for (int i = 1; i < ropesWithEncs.length; i++) { + tables = StringSupport + .trSetupTable(ropesWithEncs[i].tstring, ropesWithEncs[i].encoding, squeeze, tables, false, + enc.jcoding, this); + } + return tables; + } + + @ExplodeLoop + protected boolean argsMatch(TStringWithEncoding[] cachedRopes, TStringWithEncoding[] ropes) { + for (int i = 0; i < cachedRopes.length; i++) { + if (cachedRopes[i].encoding != ropes[i].encoding) { + return false; + } + if (!equalNode.execute(cachedRopes[i].tstring, ropes[i].tstring, cachedRopes[i].encoding.tencoding)) { + return false; + } + } + return true; + } + } + + @ImportStatic(StringGuards.class) + public abstract static class DeleteBangRopesNode extends TrTableNode { + + public static DeleteBangRopesNode create() { + return StringHelperNodesFactory.DeleteBangRopesNodeFactory.create(null); + } + + public abstract Object executeDeleteBang(RubyString string, TStringWithEncoding[] ropesWithEncs); + + @Specialization(guards = "isEmpty(string.tstring)") + protected Object deleteBangEmpty(RubyString string, Object[] args) { + return nil; + } + + @Specialization( + guards = { + "cachedArgs.length > 0", + "!isEmpty(string.tstring)", + "cachedArgs.length == args.length", + "argsMatch(cachedArgs, args)", + "libString.getEncoding(string) == cachedEncoding" }) + protected Object deleteBangFast(RubyString string, TStringWithEncoding[] args, + @Cached(value = "args", dimensions = 1) TStringWithEncoding[] cachedArgs, + @Cached RubyStringLibrary libString, + @Cached("libString.getEncoding(string)") RubyEncoding cachedEncoding, + @Cached(value = "squeeze()", dimensions = 1) boolean[] squeeze, + @Cached("findEncoding(libString.getTString(string), libString.getEncoding(string), cachedArgs)") RubyEncoding compatEncoding, + @Cached("makeTables(cachedArgs, squeeze, compatEncoding)") StringSupport.TrTables tables, + @Cached BranchProfile nullProfile) { + var processedRope = processStr(string, squeeze, compatEncoding, tables); + if (processedRope == null) { + nullProfile.enter(); + return nil; + } + + string.setTString(processedRope); + return string; + } + + @Specialization(guards = "!isEmpty(string.tstring)", replaces = "deleteBangFast") + protected Object deleteBangSlow(RubyString string, TStringWithEncoding[] args, + @Cached RubyStringLibrary libString, + @Cached BranchProfile errorProfile) { + if (args.length == 0) { + errorProfile.enter(); + throw new RaiseException(getContext(), coreExceptions().argumentErrorEmptyVarargs(this)); + } + + RubyEncoding enc = findEncoding(string.tstring, libString.getEncoding(string), args); + + return deleteBangSlow(string, args, enc); + } + + @TruffleBoundary + private Object deleteBangSlow(RubyString string, TStringWithEncoding[] ropesWithEncs, RubyEncoding enc) { + final boolean[] squeeze = new boolean[StringSupport.TRANS_SIZE + 1]; + + final StringSupport.TrTables tables = makeTables(ropesWithEncs, squeeze, enc); + + var processedRope = processStr(string, squeeze, enc, tables); + if (processedRope == null) { + return nil; + } + + string.setTString(processedRope); + // REVIEW encoding set + + return string; + } + + @TruffleBoundary + private TruffleString processStr(RubyString string, boolean[] squeeze, RubyEncoding enc, + StringSupport.TrTables tables) { + return StringSupport.delete_bangCommon19( + new ATStringWithEncoding(string.tstring, string.getEncodingUncached()), squeeze, tables, enc, this); + } + } + + @GenerateUncached + public abstract static class HashStringNode extends RubyBaseNode { + + protected static final int CLASS_SALT = 54008340; // random number, stops hashes for similar values but different classes being the same, static because we want deterministic hashes + + public static HashStringNode create() { + return StringHelperNodesFactory.HashStringNodeGen.create(); + } + + public abstract long execute(Object string); + + @Specialization + protected long hash(Object string, + @Cached RubyStringLibrary strings, + @Cached TruffleString.HashCodeNode hashCodeNode) { + int hashCode = hashCodeNode.execute(strings.getTString(string), strings.getTEncoding(string)); + return getContext().getHashing(this).hash(CLASS_SALT, hashCode); + } + } + + public abstract static class StringGetAssociatedNode extends RubyBaseNode { + + public static StringGetAssociatedNode create() { + return StringHelperNodesFactory.StringGetAssociatedNodeGen.create(); + } + + public abstract Object execute(Object string); + + @Specialization(limit = "getDynamicObjectCacheLimit()") + protected Object getAssociated(RubyString string, + @CachedLibrary("string") DynamicObjectLibrary objectLibrary) { + return objectLibrary.getOrDefault(string, Layouts.ASSOCIATED_IDENTIFIER, null); + } + + @Specialization + protected Object getAssociatedImmutable(ImmutableRubyString string) { + return null; + } + + } + + public abstract static class CheckIndexNode extends RubyBaseNode { + + public abstract int executeCheck(int index, int length); + + @Specialization + protected int checkIndex(int index, int length, + @Cached ConditionProfile negativeIndexProfile, + @Cached BranchProfile errorProfile) { + if (index >= length) { + errorProfile.enter(); + throw new RaiseException( + getContext(), + getContext().getCoreExceptions().indexErrorOutOfString(index, this)); + } + + if (negativeIndexProfile.profile(index < 0)) { + index += length; + if (index < 0) { + errorProfile.enter(); + throw new RaiseException( + getContext(), + getContext().getCoreExceptions().indexErrorOutOfString(index, this)); + } + } + + return index; + } + + } + + public abstract static class NormalizeIndexNode extends RubyBaseNode { + + public abstract int executeNormalize(int index, int length); + + public static NormalizeIndexNode create() { + return StringHelperNodesFactory.NormalizeIndexNodeGen.create(); + } + + @Specialization + protected int normalizeIndex(int index, int length, + @Cached ConditionProfile negativeIndexProfile) { + if (negativeIndexProfile.profile(index < 0)) { + return index + length; + } + + return index; + } + + } + + public abstract static class InvertAsciiCaseHelperNode extends RubyBaseNode { + + private final boolean lowerToUpper; + private final boolean upperToLower; + + public static InvertAsciiCaseHelperNode createLowerToUpper() { + return StringHelperNodesFactory.InvertAsciiCaseHelperNodeGen.create(true, false); + } + + public static InvertAsciiCaseHelperNode createUpperToLower() { + return StringHelperNodesFactory.InvertAsciiCaseHelperNodeGen.create(false, true); + } + + public static InvertAsciiCaseHelperNode createSwapCase() { + return StringHelperNodesFactory.InvertAsciiCaseHelperNodeGen.create(true, true); + } + + protected InvertAsciiCaseHelperNode(boolean lowerToUpper, boolean upperToLower) { + this.lowerToUpper = lowerToUpper; + this.upperToLower = upperToLower; + } + + public abstract byte[] executeInvert(RubyString string, TruffleStringIterator iterator, byte[] initialBytes); + + @Specialization + protected byte[] invert(RubyString string, TruffleStringIterator iterator, byte[] initialBytes, + @Cached RubyStringLibrary libString, + @Cached TruffleStringIterator.NextNode nextNode, + @Cached TruffleString.CopyToByteArrayNode copyToByteArrayNode, + @Cached BranchProfile caseSwapProfile) { + var tstring = string.tstring; + var encoding = libString.getTEncoding(string); + + byte[] modified = initialBytes; + + while (iterator.hasNext()) { + int p = iterator.getByteIndex(); + int c = nextNode.execute(iterator); + + if ((lowerToUpper && StringSupport.isAsciiLowercase(c)) || + (upperToLower && StringSupport.isAsciiUppercase(c))) { + caseSwapProfile.enter(); + + if (modified == null) { + modified = copyToByteArrayNode.execute(tstring, encoding); + } + + // Convert lower-case ASCII code point to upper-case or upper-case ASCII code point to lower-case. + modified[p] ^= 0x20; + } + } + + return modified; + } + } + + public abstract static class InvertAsciiCaseNode extends RubyBaseNode { + + @Child private InvertAsciiCaseHelperNode invertNode; + + public static InvertAsciiCaseNode createLowerToUpper() { + return StringHelperNodesFactory.InvertAsciiCaseNodeGen + .create(InvertAsciiCaseHelperNode.createLowerToUpper()); + } + + public static InvertAsciiCaseNode createUpperToLower() { + return StringHelperNodesFactory.InvertAsciiCaseNodeGen + .create(InvertAsciiCaseHelperNode.createUpperToLower()); + } + + public static InvertAsciiCaseNode createSwapCase() { + return StringHelperNodesFactory.InvertAsciiCaseNodeGen.create(InvertAsciiCaseHelperNode.createSwapCase()); + } + + public InvertAsciiCaseNode(InvertAsciiCaseHelperNode invertNode) { + this.invertNode = invertNode; + } + + public abstract Object executeInvert(RubyString string); + + @Specialization + protected Object invert(RubyString string, + @Cached RubyStringLibrary libString, + @Cached TruffleString.CreateCodePointIteratorNode createCodePointIteratorNode, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode, + @Cached ConditionProfile noopProfile) { + var tencoding = libString.getTEncoding(string); + var iterator = createCodePointIteratorNode.execute(string.tstring, tencoding, + ErrorHandling.RETURN_NEGATIVE); + byte[] modified = invertNode.executeInvert(string, iterator, null); + + if (noopProfile.profile(modified == null)) { + return nil; + } else { + string.setTString(fromByteArrayNode.execute(modified, tencoding, false)); // codeRangeNode.execute(rope), codePointLengthNode.execute(rope) + return string; + } + } + + } + + @ImportStatic(StringGuards.class) + public abstract static class GetCodePointNode extends RubyBaseNode { + + public static GetCodePointNode create() { + return StringHelperNodesFactory.GetCodePointNodeGen.create(); + } + + public abstract int executeGetCodePoint(AbstractTruffleString string, RubyEncoding encoding, int byteIndex); + + @Specialization + protected int getCodePoint(AbstractTruffleString string, RubyEncoding encoding, int byteIndex, + @Cached TruffleString.CodePointAtByteIndexNode getCodePointNode, + @Cached BranchProfile badCodePointProfile) { + int codePoint = getCodePointNode.execute(string, byteIndex, encoding.tencoding, + ErrorHandling.RETURN_NEGATIVE); + if (codePoint == -1) { + badCodePointProfile.enter(); + throw new RaiseException(getContext(), + coreExceptions().argumentErrorInvalidByteSequence(encoding, this)); + } + return codePoint; + } + + } + + public abstract static class StringAppendNode extends RubyBaseNode { + + public static StringAppendNode create() { + return StringHelperNodesFactory.StringAppendNodeGen.create(); + } + + public abstract RubyString executeStringAppend(Object string, Object other); + + @Specialization(guards = "libOther.isRubyString(other)", limit = "1") + protected RubyString stringAppend(Object string, Object other, + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary libOther, + @Cached EncodingNodes.CheckStringEncodingNode checkEncodingNode, + @Cached TruffleString.ConcatNode concatNode) { + + var left = libString.getTString(string); + var leftEncoding = libString.getEncoding(string); + var right = libOther.getTString(other); + var rightEncoding = libOther.getEncoding(other); + + final RubyEncoding compatibleEncoding = checkEncodingNode.executeCheckEncoding(left, leftEncoding, + right, rightEncoding); + + var result = concatNode.execute(left, right, compatibleEncoding.tencoding, true); + return createString(result, compatibleEncoding); + } + } + +} diff --git a/src/main/java/org/truffleruby/core/string/StringNodes.java b/src/main/java/org/truffleruby/core/string/StringNodes.java index 2bc73ef845ea..ccfd3004a20c 100644 --- a/src/main/java/org/truffleruby/core/string/StringNodes.java +++ b/src/main/java/org/truffleruby/core/string/StringNodes.java @@ -62,34 +62,35 @@ */ package org.truffleruby.core.string; -import static org.truffleruby.core.rope.CodeRange.CR_7BIT; -import static org.truffleruby.core.rope.CodeRange.CR_BROKEN; -import static org.truffleruby.core.rope.CodeRange.CR_UNKNOWN; -import static org.truffleruby.core.rope.RopeConstants.EMPTY_ASCII_8BIT_ROPE; -import static org.truffleruby.core.string.StringOperations.createString; +import static com.oracle.truffle.api.strings.TruffleString.CodeRange.ASCII; +import static com.oracle.truffle.api.strings.TruffleString.CodeRange.BROKEN; +import static com.oracle.truffle.api.strings.TruffleString.CodeRange.VALID; +import static org.truffleruby.core.string.TStringConstants.EMPTY_BINARY; import static org.truffleruby.core.string.StringSupport.MBCLEN_CHARFOUND_LEN; import static org.truffleruby.core.string.StringSupport.MBCLEN_CHARFOUND_P; import static org.truffleruby.core.string.StringSupport.MBCLEN_INVALID_P; import static org.truffleruby.core.string.StringSupport.MBCLEN_NEEDMORE_P; -import java.io.UnsupportedEncodingException; -import java.nio.charset.StandardCharsets; - -import com.oracle.truffle.api.TruffleSafepoint; import com.oracle.truffle.api.dsl.Bind; import com.oracle.truffle.api.dsl.Cached.Exclusive; import com.oracle.truffle.api.dsl.Cached.Shared; import com.oracle.truffle.api.dsl.GenerateNodeFactory; -import com.oracle.truffle.api.nodes.Node; -import com.oracle.truffle.api.profiles.LoopConditionProfile; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.MutableTruffleString; +import com.oracle.truffle.api.strings.TruffleString; +import com.oracle.truffle.api.strings.TruffleString.AsTruffleStringNode; +import com.oracle.truffle.api.strings.TruffleString.CodePointLengthNode; +import com.oracle.truffle.api.strings.TruffleString.CreateCodePointIteratorNode; +import com.oracle.truffle.api.strings.TruffleString.ErrorHandling; +import com.oracle.truffle.api.strings.TruffleString.GetByteCodeRangeNode; +import com.oracle.truffle.api.strings.TruffleStringIterator; import org.graalvm.collections.Pair; import org.jcodings.Config; import org.jcodings.Encoding; +import org.jcodings.ascii.AsciiTables; import org.jcodings.specific.ASCIIEncoding; -import org.jcodings.specific.USASCIIEncoding; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.Layouts; -import org.truffleruby.SuppressFBWarnings; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreMethodNode; @@ -99,22 +100,23 @@ import org.truffleruby.builtins.PrimitiveArrayArgumentsNode; import org.truffleruby.builtins.PrimitiveNode; import org.truffleruby.builtins.YieldingCoreMethodNode; +import org.truffleruby.collections.ByteArrayBuilder; import org.truffleruby.core.CoreLibrary; import org.truffleruby.core.array.ArrayUtils; import org.truffleruby.core.array.RubyArray; import org.truffleruby.core.cast.BooleanCastNode; import org.truffleruby.core.cast.ToIntNode; import org.truffleruby.core.cast.ToLongNode; -import org.truffleruby.core.cast.ToRopeNodeGen; import org.truffleruby.core.cast.ToStrNode; import org.truffleruby.core.cast.ToStrNodeGen; -import org.truffleruby.core.encoding.EncodingNodes; +import org.truffleruby.core.encoding.EncodingNodes.NegotiateCompatibleStringEncodingNode; import org.truffleruby.core.encoding.IsCharacterHeadNode; import org.truffleruby.core.encoding.EncodingNodes.CheckEncodingNode; import org.truffleruby.core.encoding.EncodingNodes.GetActualEncodingNode; import org.truffleruby.core.encoding.EncodingNodes.NegotiateCompatibleEncodingNode; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.format.FormatExceptionTranslator; import org.truffleruby.core.format.exceptions.FormatException; import org.truffleruby.core.format.unpack.ArrayResult; @@ -129,63 +131,18 @@ import org.truffleruby.core.range.RubyLongRange; import org.truffleruby.core.range.RubyObjectRange; import org.truffleruby.core.regexp.RubyRegexp; -import org.truffleruby.core.rope.Bytes; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.ConcatRope; -import org.truffleruby.core.rope.ConcatRope.ConcatState; -import org.truffleruby.core.rope.LazyIntRope; -import org.truffleruby.core.rope.LeafRope; -import org.truffleruby.core.rope.NativeRope; -import org.truffleruby.core.rope.RepeatingRope; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeGuards; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeNodes.AreComparableRopesNode; -import org.truffleruby.core.rope.RopeNodes.AsciiOnlyNode; -import org.truffleruby.core.rope.RopeNodes.BytesNode; -import org.truffleruby.core.rope.RopeNodes.CalculateCharacterLengthNode; -import org.truffleruby.core.rope.RopeNodes.CharacterLengthNode; -import org.truffleruby.core.rope.RopeNodes.CodeRangeNode; -import org.truffleruby.core.rope.RopeNodes.CompareRopesNode; -import org.truffleruby.core.rope.RopeNodes.ConcatNode; -import org.truffleruby.core.rope.RopeNodes.FlattenNode; -import org.truffleruby.core.rope.RopeNodes.GetByteNode; -import org.truffleruby.core.rope.RopeNodes.GetBytesObjectNode; -import org.truffleruby.core.rope.RopeNodes.GetCodePointNode; -import org.truffleruby.core.rope.RopeNodes.MakeLeafRopeNode; -import org.truffleruby.core.rope.RopeNodes.RepeatNode; -import org.truffleruby.core.rope.RopeNodes.SingleByteOptimizableNode; -import org.truffleruby.core.rope.RopeNodes.SubstringNode; -import org.truffleruby.core.rope.RopeNodes.WithEncodingNode; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.core.rope.RopeWithEncoding; -import org.truffleruby.core.rope.SubstringRope; -import org.truffleruby.core.string.StringNodesFactory.ByteIndexFromCharIndexNodeGen; -import org.truffleruby.core.string.StringNodesFactory.ByteSizeNodeFactory; -import org.truffleruby.core.string.StringNodesFactory.CheckIndexNodeGen; -import org.truffleruby.core.string.StringNodesFactory.CountRopesNodeFactory; +import org.truffleruby.core.string.StringHelperNodes.DeleteBangRopesNode; +import org.truffleruby.core.string.StringHelperNodes.SingleByteOptimizableNode; import org.truffleruby.core.string.StringNodesFactory.DeleteBangNodeFactory; -import org.truffleruby.core.string.StringNodesFactory.DeleteBangRopesNodeFactory; -import org.truffleruby.core.string.StringNodesFactory.InvertAsciiCaseBytesNodeGen; -import org.truffleruby.core.string.StringNodesFactory.InvertAsciiCaseNodeGen; -import org.truffleruby.core.string.StringNodesFactory.MakeStringNodeGen; -import org.truffleruby.core.string.StringNodesFactory.NormalizeIndexNodeGen; -import org.truffleruby.core.string.StringNodesFactory.StringAppendNodeGen; import org.truffleruby.core.string.StringNodesFactory.StringAppendPrimitiveNodeFactory; -import org.truffleruby.core.string.StringNodesFactory.StringByteCharacterIndexNodeFactory; -import org.truffleruby.core.string.StringNodesFactory.StringByteSubstringPrimitiveNodeFactory; -import org.truffleruby.core.string.StringNodesFactory.StringDupAsStringInstanceNodeFactory; -import org.truffleruby.core.string.StringNodesFactory.StringEqualNodeGen; import org.truffleruby.core.string.StringNodesFactory.StringSubstringPrimitiveNodeFactory; import org.truffleruby.core.string.StringNodesFactory.SumNodeFactory; -import org.truffleruby.core.string.StringSupport.TrTables; import org.truffleruby.core.support.RubyByteArray; import org.truffleruby.core.symbol.RubySymbol; +import org.truffleruby.extra.ffi.Pointer; +import org.truffleruby.interop.ToJavaStringNode; import org.truffleruby.language.Nil; import org.truffleruby.language.NotProvided; -import org.truffleruby.language.RubyBaseNode; import org.truffleruby.language.RubyBaseNodeWithExecute; import org.truffleruby.language.RubyGuards; import org.truffleruby.language.RubyNode; @@ -197,7 +154,6 @@ import org.truffleruby.language.dispatch.DispatchNode; import org.truffleruby.language.library.RubyStringLibrary; import org.truffleruby.language.objects.AllocationTracing; -import org.truffleruby.language.objects.LogicalClassNode; import org.truffleruby.language.objects.WriteObjectFieldNode; import org.truffleruby.language.threadlocal.SpecialVariableStorage; import org.truffleruby.language.yield.CallBlockNode; @@ -215,137 +171,15 @@ import com.oracle.truffle.api.dsl.ReportPolymorphism; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.frame.VirtualFrame; -import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.nodes.DirectCallNode; import com.oracle.truffle.api.nodes.ExplodeLoop; import com.oracle.truffle.api.nodes.IndirectCallNode; -import com.oracle.truffle.api.object.DynamicObjectLibrary; -import com.oracle.truffle.api.object.Shape; import com.oracle.truffle.api.profiles.BranchProfile; import com.oracle.truffle.api.profiles.ConditionProfile; @CoreModule(value = "String", isClass = true) public abstract class StringNodes { - @GenerateUncached - public abstract static class MakeStringNode extends RubyBaseNode { - - public abstract RubyString executeMake(Object payload, RubyEncoding encoding, Object codeRange); - - public RubyString fromRope(Rope rope, RubyEncoding rubyEncoding) { - return executeMake(rope, rubyEncoding, NotProvided.INSTANCE); - } - - public RubyString fromBuilder(RopeBuilder builder, RubyEncoding encoding, CodeRange codeRange) { - assert builder.getEncoding() == encoding.jcoding; - return executeMake(builder.getBytes(), encoding, codeRange); - } - - /** All callers of this factory method must guarantee that the builder's byte array cannot change after this - * call, otherwise the rope built from the builder will end up in an inconsistent state. */ - public RubyString fromBuilderUnsafe(RopeBuilder builder, RubyEncoding encoding, CodeRange codeRange) { - assert builder.getEncoding() == encoding.jcoding; - final byte[] unsafeBytes = builder.getUnsafeBytes(); - final byte[] ropeBytes; - - // While the caller must guarantee the builder's byte[] cannot change after this call, it's possible - // the builder has allocated more space than it needs. Ropes require that the backing byte array - // is the exact length required. If the builder doesn't satisfy this constraint, we must make a copy. - // Alternatively, we could make a leaf rope and then take a substring of it, but that would complicate - // the specializations here. - if (unsafeBytes.length == builder.getLength()) { - ropeBytes = unsafeBytes; - } else { - ropeBytes = builder.getBytes(); - } - - return executeMake(ropeBytes, encoding, codeRange); - } - - public static MakeStringNode create() { - return MakeStringNodeGen.create(); - } - - public static MakeStringNode getUncached() { - return MakeStringNodeGen.getUncached(); - } - - @Specialization - protected RubyString makeStringFromRope(Rope rope, RubyEncoding encoding, NotProvided codeRange) { - assert rope.encoding == encoding.jcoding; - final RubyString string = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - rope, - encoding); - AllocationTracing.trace(string, this); - return string; - } - - @Specialization - protected RubyString makeStringFromBytes(byte[] bytes, RubyEncoding encoding, CodeRange codeRange, - @Cached MakeLeafRopeNode makeLeafRopeNode) { - final LeafRope rope = makeLeafRopeNode - .executeMake(bytes, encoding.jcoding, codeRange, NotProvided.INSTANCE); - final RubyString string = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - rope, - encoding); - AllocationTracing.trace(string, this); - return string; - } - - @Specialization(guards = "is7Bit(codeRange)") - protected RubyString makeAsciiStringFromString(String string, RubyEncoding encoding, CodeRange codeRange) { - final byte[] bytes = RopeOperations.encodeAsciiBytes(string); - - return executeMake(bytes, encoding, codeRange); - } - - @Specialization(guards = "!is7Bit(codeRange)") - protected RubyString makeStringFromString(String string, RubyEncoding encoding, CodeRange codeRange) { - final byte[] bytes = StringOperations.encodeBytes(string, encoding.jcoding); - - return executeMake(bytes, encoding, codeRange); - } - - protected static boolean is7Bit(CodeRange codeRange) { - return codeRange == CR_7BIT; - } - - } - - public abstract static class StringSubstringNode extends RubyBaseNode { - - @Child private SubstringNode substringNode = SubstringNode.create(); - - public static StringSubstringNode create() { - return StringNodesFactory.StringSubstringNodeGen.create(); - } - - public abstract RubyString executeSubstring(Object string, int offset, int byteLength); - - @Specialization - protected RubyString substring(Object source, int offset, int byteLength, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libSource, - @Cached LogicalClassNode logicalClassNode) { - final Rope rope = libSource.getRope(source); - final RubyClass logicalClass = logicalClassNode.execute(source); - final RubyString string = new RubyString( - logicalClass, - getLanguage().stringShape, - false, - substringNode.executeSubstring(rope, offset, byteLength), - libSource.getEncoding(source)); - AllocationTracing.trace(string, this); - return string; - } - - } - @GenerateUncached @GenerateNodeFactory @CoreMethod(names = { "__allocate__", "__layout_allocate__" }, constructor = true, visibility = Visibility.PRIVATE) @@ -360,12 +194,11 @@ public static AllocateNode create() { @Specialization protected RubyString allocate(RubyClass rubyClass) { - final Shape shape = getLanguage().stringShape; final RubyString string = new RubyString( rubyClass, - shape, + getLanguage().stringShape, false, - EMPTY_ASCII_8BIT_ROPE, + EMPTY_BINARY, Encodings.BINARY); AllocationTracing.trace(string, this); return string; @@ -386,19 +219,8 @@ protected ToStrNode coerceOtherToString(RubyBaseNodeWithExecute other) { @Specialization protected RubyString add(Object string, Object other, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringLibrary, - @Cached StringAppendNode stringAppendNode) { - final RopeWithEncoding concatRopeResult = stringAppendNode.executeStringAppend(string, other); - final RubyClass rubyClass = coreLibrary().stringClass; - final Shape shape = getLanguage().stringShape; - final RubyString ret = new RubyString( - rubyClass, - shape, - false, - concatRopeResult.getRope(), - concatRopeResult.getEncoding()); - AllocationTracing.trace(ret, this); - return ret; + @Cached StringHelperNodes.StringAppendNode stringAppendNode) { + return stringAppendNode.executeStringAppend(string, other); } } @@ -417,16 +239,9 @@ protected RubyBaseNodeWithExecute coerceToInteger(RubyBaseNodeWithExecute times) @Specialization(guards = "times == 0") protected RubyString multiplyZero(Object string, int times, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - - final RubyString instance = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - RopeOperations.emptyRope(libString.getRope(string).getEncoding()), - libString.getEncoding(string)); - AllocationTracing.trace(instance, this); - return instance; + @Cached RubyStringLibrary libString) { + final RubyEncoding encoding = libString.getEncoding(string); + return createString(encoding.tencoding.getEmpty(), encoding); } @Specialization(guards = "times < 0") @@ -434,48 +249,33 @@ protected RubyString multiplyTimesNegative(Object string, long times) { throw new RaiseException(getContext(), coreExceptions().argumentError("negative argument", this)); } - @Specialization(guards = { "times > 0", "!isEmpty(libString.getRope(string))" }) + @Specialization(guards = { "times > 0", "!isEmpty(libString.getTString(string))" }) protected RubyString multiply(Object string, int times, - @Cached @Shared("repeatNode") RepeatNode repeatNode, @Cached BranchProfile tooBigProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - final Rope stringRope = libString.getRope(string); - long length = (long) times * stringRope.byteLength(); - if (length > Integer.MAX_VALUE) { + @Cached RubyStringLibrary libString, + @Cached TruffleString.RepeatNode repeatNode) { + var tstring = libString.getTString(string); + var encoding = libString.getEncoding(string); + + long longLength = (long) times * tstring.byteLength(encoding.tencoding); + if (longLength > Integer.MAX_VALUE) { tooBigProfile.enter(); throw tooBig(); } - final Rope repeated = repeatNode.executeRepeat(stringRope, times); - final RubyString instance = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - repeated, - libString.getEncoding(string)); - AllocationTracing.trace(instance, this); - return instance; + return createString(repeatNode.execute(tstring, times, encoding.tencoding), encoding); } - @Specialization(guards = { "times > 0", "isEmpty(libString.getRope(string))" }) + @Specialization(guards = { "times > 0", "libString.getTString(string).isEmpty()" }) protected RubyString multiplyEmpty(Object string, long times, - @Cached @Shared("repeatNode") RepeatNode repeatNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - final Rope repeated = repeatNode.executeRepeat(libString.getRope(string), 0); - - final RubyString instance = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - repeated, - libString.getEncoding(string)); - AllocationTracing.trace(instance, this); - return instance; + @Cached RubyStringLibrary libString) { + var encoding = libString.getEncoding(string); + return createString(encoding.tencoding.getEmpty(), encoding); } - @Specialization(guards = { "times > 0", "!isEmpty(strings.getRope(string))" }) + @Specialization(guards = { "times > 0", "!isEmpty(strings.getTString(string))" }) protected RubyString multiplyNonEmpty(Object string, long times, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { assert !CoreLibrary.fitsIntoInteger(times); throw tooBig(); } @@ -488,18 +288,24 @@ private RaiseException tooBig() { } @CoreMethod(names = { "==", "===", "eql?" }, required = 1) - public abstract static class EqualNode extends CoreMethodArrayArgumentsNode { + public abstract static class EqualCoreMethodNode extends CoreMethodArrayArgumentsNode { - @Child private StringEqualNode stringEqualNode = StringEqualNodeGen.create(); @Child private KernelNodes.RespondToNode respondToNode; @Child private DispatchNode objectEqualNode; @Child private BooleanCastNode booleanCastNode; - @Specialization(guards = "libB.isRubyString(b)") + @Specialization(guards = "libB.isRubyString(b)", limit = "1") protected boolean equalString(Object a, Object b, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libA, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libB) { - return stringEqualNode.executeStringEqual(libA.getRope(a), libB.getRope(b)); + @Cached RubyStringLibrary libA, + @Cached RubyStringLibrary libB, + @Cached NegotiateCompatibleStringEncodingNode negotiateCompatibleStringEncodingNode, + @Cached StringHelperNodes.StringEqualInternalNode stringEqualInternalNode) { + var tstringA = libA.getTString(a); + var encA = libA.getEncoding(a); + var tstringB = libB.getTString(b); + var encB = libB.getEncoding(b); + var compatibleEncoding = negotiateCompatibleStringEncodingNode.execute(tstringA, encA, tstringB, encB); + return stringEqualInternalNode.executeInternal(tstringA, tstringB, compatibleEncoding); } @Specialization(guards = "isNotRubyString(b)") @@ -528,65 +334,103 @@ protected boolean equal(Object a, Object b) { } + // compatibleEncoding is RubyEncoding or Nil in this node @Primitive(name = "string_cmp") public abstract static class CompareNode extends CoreMethodArrayArgumentsNode { - @Specialization - protected int compare(Object a, Object b, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libA, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libB, + @Specialization(guards = "first.isEmpty() || second.isEmpty()") + protected int empty(Object a, Object b, RubyEncoding compatibleEncoding, + @Cached RubyStringLibrary libA, + @Cached RubyStringLibrary libB, + @Bind("libA.getTString(a)") AbstractTruffleString first, + @Bind("libB.getTString(b)") AbstractTruffleString second, + @Cached ConditionProfile bothEmpty) { + if (bothEmpty.profile(first.isEmpty() && second.isEmpty())) { + return 0; + } else { + return first.isEmpty() ? -1 : 1; + } + } + + @Specialization(guards = { "!first.isEmpty()", "!second.isEmpty()" }) + protected int compatible(Object a, Object b, RubyEncoding compatibleEncoding, + @Cached RubyStringLibrary libA, + @Cached RubyStringLibrary libB, + @Bind("libA.getTString(a)") AbstractTruffleString first, + @Bind("libB.getTString(b)") AbstractTruffleString second, @Cached ConditionProfile sameRopeProfile, - @Cached CompareRopesNode compareNode) { - // Taken from org.jruby.RubyString#op_cmp + @Cached TruffleString.CompareBytesNode compareBytesNode, + @Cached ConditionProfile equalProfile, + @Cached ConditionProfile positiveProfile) { + if (sameRopeProfile.profile(first == second)) { + return 0; + } - final Rope firstRope = libA.getRope(a); - final Rope secondRope = libB.getRope(b); + int result = compareBytesNode.execute(first, second, compatibleEncoding.tencoding); + if (equalProfile.profile(result == 0)) { + return 0; + } else { + return positiveProfile.profile(result > 0) ? 1 : -1; + } + } - if (sameRopeProfile.profile(firstRope == secondRope)) { + @Specialization + protected int notCompatible(Object a, Object b, Nil compatibleEncoding, + @Cached RubyStringLibrary libA, + @Cached RubyStringLibrary libB, + @Cached ConditionProfile sameRopeProfile, + @Cached TruffleString.CompareBytesNode compareBytesNode, + @Cached TruffleString.ForceEncodingNode forceEncoding1Node, + @Cached TruffleString.ForceEncodingNode forceEncoding2Node, + @Cached ConditionProfile equalProfile, + @Cached ConditionProfile positiveProfile, + @Cached ConditionProfile encodingIndexGreaterThanProfile) { + var first = libA.getTString(a); + var firstEncoding = libA.getEncoding(a); + var second = libB.getTString(b); + var secondEncoding = libB.getEncoding(b); + + if (sameRopeProfile.profile(first == second)) { return 0; } - return compareNode.execute(firstRope, secondRope); + // Compare as binary as CRuby compares bytes regardless of the encodings + var firstBinary = forceEncoding1Node.execute(first, firstEncoding.tencoding, Encodings.BINARY.tencoding); + var secondBinary = forceEncoding2Node.execute(second, secondEncoding.tencoding, Encodings.BINARY.tencoding); + int result = compareBytesNode.execute(firstBinary, secondBinary, Encodings.BINARY.tencoding); + + if (equalProfile.profile(result == 0)) { + if (encodingIndexGreaterThanProfile.profile(firstEncoding.index > secondEncoding.index)) { + return 1; + } else { + return -1; + } + } + + return positiveProfile.profile(result > 0) ? 1 : -1; } } @Primitive(name = "dup_as_string_instance") public abstract static class StringDupAsStringInstanceNode extends PrimitiveArrayArgumentsNode { - - public static StringDupAsStringInstanceNode create() { - return StringDupAsStringInstanceNodeFactory.create(null); - } - - public abstract RubyString executeDupAsStringInstance(Object a); - @Specialization protected RubyString dupAsStringInstance(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - - final Rope rope = strings.getRope(string); + @Cached RubyStringLibrary strings, + @Cached AsTruffleStringNode asTruffleStringNode) { final RubyEncoding encoding = strings.getEncoding(string); - - final RubyString ret = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - rope, - encoding); - AllocationTracing.trace(ret, this); - return ret; + return createStringCopy(asTruffleStringNode, strings.getTString(string), encoding); } - } @CoreMethod(names = "<<", required = 1, raiseIfNotMutableSelf = true) @ImportStatic(StringGuards.class) public abstract static class StringConcatOneNode extends CoreMethodArrayArgumentsNode { - @Specialization(guards = "libFirst.isRubyString(first)") + @Specialization(guards = "libFirst.isRubyString(first)", limit = "1") protected RubyString concat(RubyString string, Object first, @Cached StringAppendPrimitiveNode stringAppendNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFirst) { + @Cached RubyStringLibrary libFirst) { return stringAppendNode.executeStringAppend(string, first); } @@ -613,10 +457,10 @@ protected RubyString concatZero(RubyString string, NotProvided first, Object[] r return string; } - @Specialization(guards = { "rest.length == 0", "libFirst.isRubyString(first)" }) + @Specialization(guards = { "rest.length == 0", "libFirst.isRubyString(first)" }, limit = "1") protected RubyString concat(RubyString string, Object first, Object[] rest, @Cached StringAppendPrimitiveNode stringAppendNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFirst) { + @Cached RubyStringLibrary libFirst) { return stringAppendNode.executeStringAppend(string, first); } @@ -634,15 +478,18 @@ protected Object concatGeneric(RubyString string, Object first, Object[] rest, "rest.length == cachedLength", "cachedLength <= MAX_EXPLODE_SIZE" }) protected Object concatMany(RubyString string, Object first, Object[] rest, + @Cached RubyStringLibrary libString, @Cached("rest.length") int cachedLength, @Cached StringConcatNode argConcatNode, + @Cached AsTruffleStringNode asTruffleStringNode, @Cached ConditionProfile selfArgProfile) { - Rope rope = string.rope; + var tstring = string.tstring; Object result = argConcatNode.executeConcat(string, first, EMPTY_ARGUMENTS); for (int i = 0; i < cachedLength; ++i) { - final Object argOrCopy = selfArgProfile.profile(rest[i] == string) - ? createString(this, rope, string.encoding) - : rest[i]; + Object arg = rest[i]; + final Object argOrCopy = selfArgProfile.profile(arg == string) + ? createStringCopy(asTruffleStringNode, tstring, libString.getEncoding(string)) + : arg; result = argConcatNode.executeConcat(string, argOrCopy, EMPTY_ARGUMENTS); } return result; @@ -651,17 +498,17 @@ protected Object concatMany(RubyString string, Object first, Object[] rest, /** Same implementation as {@link #concatMany}, safe for the use of {@code cachedLength} */ @Specialization(guards = { "wasProvided(first)", "rest.length > 0" }, replaces = "concatMany") protected Object concatManyGeneral(RubyString string, Object first, Object[] rest, + @Cached RubyStringLibrary libString, @Cached StringConcatNode argConcatNode, + @Cached AsTruffleStringNode asTruffleStringNode, @Cached ConditionProfile selfArgProfile) { - Rope rope = string.rope; + var tstring = string.tstring; Object result = argConcatNode.executeConcat(string, first, EMPTY_ARGUMENTS); for (Object arg : rest) { - if (selfArgProfile.profile(arg == string)) { - Object copy = createString(this, rope, string.encoding); - result = argConcatNode.executeConcat(string, copy, EMPTY_ARGUMENTS); - } else { - result = argConcatNode.executeConcat(string, arg, EMPTY_ARGUMENTS); - } + final Object argOrCopy = selfArgProfile.profile(arg == string) + ? createStringCopy(asTruffleStringNode, tstring, libString.getEncoding(string)) + : arg; + result = argConcatNode.executeConcat(string, argOrCopy, EMPTY_ARGUMENTS); } return result; } @@ -676,10 +523,10 @@ protected Object concatManyGeneral(RubyString string, Object first, Object[] res public abstract static class GetIndexNode extends CoreMethodArrayArgumentsNode { //region Fields - @Child private NormalizeIndexNode normalizeIndexNode; + @Child private StringHelperNodes.NormalizeIndexNode normalizeIndexNode; @Child private StringSubstringPrimitiveNode substringNode; @Child private ToLongNode toLongNode; - @Child private CharacterLengthNode charLengthNode; + @Child private CodePointLengthNode codePointLengthNode; private final BranchProfile outOfBounds = BranchProfile.create(); // endregion @@ -687,8 +534,8 @@ public abstract static class GetIndexNode extends CoreMethodArrayArgumentsNode { @Specialization protected Object getIndex(Object string, int index, NotProvided length, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - return index == charLength(strings.getRope(string)) // Check for the only difference from str[index, 1] + @Cached RubyStringLibrary strings) { + return index == codePointLength(strings.getTString(string), strings.getEncoding(string)) // Check for the only difference from str[index, 1] ? outOfBoundsNil() : substring(string, index, 1); } @@ -705,7 +552,7 @@ protected Object getIndex(Object string, long index, NotProvided length) { "!isRubyRegexp(index)", "isNotRubyString(index)" }) protected Object getIndex(Object string, Object index, NotProvided length, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { long indexLong = toLong(index); int indexInt = (int) indexLong; return indexInt != indexLong @@ -754,38 +601,38 @@ protected Object slice(Object string, Object start, Object length) { @Specialization protected Object sliceIntegerRange(Object string, RubyIntRange range, NotProvided length, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached RubyStringLibrary libString) { return sliceRange(string, libString, range.begin, range.end, range.excludedEnd); } @Specialization protected Object sliceLongRange(Object string, RubyLongRange range, NotProvided length, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached RubyStringLibrary libString) { return sliceRange(string, libString, range.begin, range.end, range.excludedEnd); } @Specialization(guards = "range.isEndless()") protected Object sliceEndlessRange(Object string, RubyObjectRange range, NotProvided length, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached RubyStringLibrary libString) { final int stringEnd = range.excludedEnd ? Integer.MAX_VALUE : Integer.MAX_VALUE - 1; return sliceRange(string, libString, toLong(range.begin), stringEnd, range.excludedEnd); } @Specialization(guards = "range.isBeginless()") protected Object sliceBeginlessRange(Object string, RubyObjectRange range, NotProvided length, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached RubyStringLibrary libString) { return sliceRange(string, libString, 0L, toLong(range.end), range.excludedEnd); } @Specialization(guards = "range.isBounded()") protected Object sliceObjectRange(Object string, RubyObjectRange range, NotProvided length, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached RubyStringLibrary libString) { return sliceRange(string, libString, toLong(range.begin), toLong(range.end), range.excludedEnd); } @Specialization(guards = "range.isBoundless()") protected Object sliceBoundlessRange(Object string, RubyObjectRange range, NotProvided length, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached RubyStringLibrary libString) { final int stringEnd = range.excludedEnd ? Integer.MAX_VALUE : Integer.MAX_VALUE - 1; return sliceRange(string, libString, 0L, stringEnd, range.excludedEnd); } @@ -810,7 +657,7 @@ private Object sliceRange(Object string, RubyStringLibrary libString, long begin } private Object sliceRange(Object string, RubyStringLibrary libString, int begin, int end, boolean excludesEnd) { - final int stringLength = charLength(libString.getRope(string)); + final int stringLength = codePointLength(libString.getTString(string), libString.getEncoding(string)); begin = normalizeIndex(begin, stringLength); if (begin < 0 || begin > stringLength) { return outOfBoundsNil(); @@ -830,8 +677,7 @@ protected Object sliceCapture(VirtualFrame frame, Object string, RubyRegexp rege @Cached ConditionProfile unsetProfile, @Cached ConditionProfile sameThreadProfile, @Cached ConditionProfile notMatchedProfile, - @Cached ConditionProfile captureSetProfile, - @Cached StringDupAsStringInstanceNode dupNode) { + @Cached ConditionProfile captureSetProfile) { final Object capture = RubyGuards.wasProvided(maybeCapture) ? maybeCapture : 0; final Object matchStrPair = callNode.call( getContext().getCoreLibrary().truffleStringOperationsModule, @@ -850,7 +696,7 @@ protected Object sliceCapture(VirtualFrame frame, Object string, RubyRegexp rege final Object captureStringOrNil = array[1]; variables.setLastMatch(matchData, getContext(), unsetProfile, sameThreadProfile); if (captureSetProfile.profile(captureStringOrNil != nil)) { - return dupNode.executeDupAsStringInstance(captureStringOrNil); + return captureStringOrNil; } else { return nil; } @@ -860,17 +706,18 @@ protected Object sliceCapture(VirtualFrame frame, Object string, RubyRegexp rege // endregion // region String Slice Specialization - @Specialization(guards = "stringsMatchStr.isRubyString(matchStr)") + @Specialization(guards = "stringsMatchStr.isRubyString(matchStr)", limit = "1") protected Object slice2(Object string, Object matchStr, NotProvided length, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsMatchStr, + @Cached RubyStringLibrary stringsMatchStr, @Cached @Exclusive DispatchNode includeNode, @Cached BooleanCastNode booleanCastNode, - @Cached @Exclusive StringDupAsStringInstanceNode dupNode) { + @Cached AsTruffleStringNode asTruffleStringNode) { final Object included = includeNode.call(string, "include?", matchStr); if (booleanCastNode.execute(included)) { - return dupNode.executeDupAsStringInstance(matchStr); + final RubyEncoding encoding = stringsMatchStr.getEncoding(matchStr); + return createStringCopy(asTruffleStringNode, stringsMatchStr.getTString(matchStr), encoding); } return nil; @@ -903,19 +750,19 @@ private long toLong(Object value) { return toLongNode.execute(value); } - private int charLength(Rope rope) { - if (charLengthNode == null) { + private int codePointLength(AbstractTruffleString string, RubyEncoding encoding) { + if (codePointLengthNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); - charLengthNode = insert(CharacterLengthNode.create()); + codePointLengthNode = insert(CodePointLengthNode.create()); } - return charLengthNode.execute(rope); + return codePointLengthNode.execute(string, encoding.tencoding); } private int normalizeIndex(int index, int length) { if (normalizeIndexNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); - normalizeIndexNode = insert(NormalizeIndexNode.create()); + normalizeIndexNode = insert(StringHelperNodes.NormalizeIndexNode.create()); } return normalizeIndexNode.executeNormalize(index, length); @@ -929,69 +776,20 @@ public abstract static class ASCIIOnlyNode extends CoreMethodArrayArgumentsNode @Specialization protected boolean asciiOnly(Object string, - @Cached CodeRangeNode codeRangeNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - final CodeRange codeRange = codeRangeNode.execute(libString.getRope(string)); - - return codeRange == CR_7BIT; - } - - } - - @CoreMethod(names = "bytes", needsBlock = true) - public abstract static class StringBytesNode extends YieldingCoreMethodNode { - - @Child private BytesNode bytesNode = BytesNode.create(); - - @Specialization - protected RubyArray bytes(Object string, Nil block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - final Rope rope = strings.getRope(string); - final byte[] bytes = bytesNode.execute(rope); - - final int[] store = new int[bytes.length]; - - for (int n = 0; n < store.length; n++) { - store[n] = bytes[n] & 0xFF; - } - - return createArray(store); - } - - @Specialization - protected Object bytes(Object string, RubyProc block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - Rope rope = strings.getRope(string); - byte[] bytes = bytesNode.execute(rope); - - for (int i = 0; i < bytes.length; i++) { - callBlock(block, bytes[i] & 0xff); - } - - return string; + @Cached GetByteCodeRangeNode codeRangeNode, + @Cached RubyStringLibrary libString) { + return StringGuards.is7Bit(libString.getTString(string), libString.getEncoding(string), codeRangeNode); } } @CoreMethod(names = "bytesize") public abstract static class ByteSizeNode extends CoreMethodArrayArgumentsNode { - - public static ByteSizeNode create() { - return ByteSizeNodeFactory.create(null); - } - - public abstract int executeByteSize(Object string); - - @Specialization - protected int byteSize(RubyString string) { - return string.rope.byteLength(); - } - @Specialization - protected int immutableByteSize(ImmutableRubyString string) { - return string.rope.byteLength(); + protected int byteSize(Object string, + @Cached RubyStringLibrary libString) { + return libString.byteLength(string); } - } @Primitive(name = "string_casecmp") @@ -1001,19 +799,22 @@ public abstract static class CaseCmpNode extends PrimitiveNode { @Child private NegotiateCompatibleEncodingNode negotiateCompatibleEncodingNode = NegotiateCompatibleEncodingNode .create(); - @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode - .create(); + @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode.create(); private final ConditionProfile incompatibleEncodingProfile = ConditionProfile.create(); + private final ConditionProfile sameProfile = ConditionProfile.create(); @CreateCast("other") protected ToStrNode coerceOtherToString(RubyBaseNodeWithExecute other) { return ToStrNodeGen.create(other); } - @Specialization(guards = "bothSingleByteOptimizable(strings.getRope(string), stringsOther.getRope(other))") + @Specialization( + guards = "bothSingleByteOptimizable(libString.getTString(string), libOther.getTString(other), libString.getEncoding(string), libOther.getEncoding(other))") protected Object caseCmpSingleByte(Object string, Object other, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsOther) { + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary libOther, + @Cached TruffleString.GetInternalByteArrayNode byteArraySelfNode, + @Cached TruffleString.GetInternalByteArrayNode byteArrayOtherNode) { // Taken from org.jruby.RubyString#casecmp19. final RubyEncoding encoding = negotiateCompatibleEncodingNode.executeNegotiate(string, other); @@ -1021,14 +822,24 @@ protected Object caseCmpSingleByte(Object string, Object other, return nil; } - return RopeOperations.caseInsensitiveCmp(strings.getRope(string), stringsOther.getRope(other)); + var selfTString = libString.getTString(string); + var selfByteArray = byteArraySelfNode.execute(selfTString, libString.getTEncoding(string)); + var otherTString = libOther.getTString(other); + var otherByteArray = byteArrayOtherNode.execute(otherTString, libOther.getTEncoding(other)); + + if (sameProfile.profile(selfTString == otherTString)) { + return 0; + } + + return caseInsensitiveCmp(selfByteArray, otherByteArray); } - @Specialization(guards = "!bothSingleByteOptimizable(strings.getRope(string), stringsOther.getRope(other))") + @Specialization( + guards = "!bothSingleByteOptimizable(libString.getTString(string), libOther.getTString(other), libString.getEncoding(string), libOther.getEncoding(other))") protected Object caseCmp(Object string, Object other, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsOther) { - // Taken from org.jruby.RubyString#casecmp19 and + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary libOther) { + // Taken from org.jruby.RubyString#casecmp19 final RubyEncoding encoding = negotiateCompatibleEncodingNode.executeNegotiate(string, other); @@ -1036,58 +847,116 @@ protected Object caseCmp(Object string, Object other, return nil; } - return StringSupport - .multiByteCasecmp(encoding.jcoding, strings.getRope(string), stringsOther.getRope(other)); + var selfTString = libString.getTString(string); + var selfEncoding = libString.getTEncoding(string); + + var otherTString = libOther.getTString(other); + var otherEncoding = libOther.getTEncoding(other); + + if (sameProfile.profile(selfTString == otherTString)) { + return 0; + } + + return StringSupport.multiByteCasecmp(encoding, selfTString, selfEncoding, otherTString, otherEncoding); + } + + protected boolean bothSingleByteOptimizable(AbstractTruffleString string, AbstractTruffleString other, + RubyEncoding stringEncoding, + RubyEncoding otherEncoding) { + return singleByteOptimizableNode.execute(string, stringEncoding) && + singleByteOptimizableNode.execute(other, otherEncoding); + } + + @TruffleBoundary + private static int caseInsensitiveCmp(InternalByteArray value, InternalByteArray other) { + // Taken from org.jruby.util.ByteList#caseInsensitiveCmp. + final int size = value.getLength(); + final int len = Math.min(size, other.getLength()); + + for (int offset = -1; ++offset < len;) { + int myCharIgnoreCase = AsciiTables.ToLowerCaseTable[value.get(offset) & 0xff] & 0xff; + int otherCharIgnoreCase = AsciiTables.ToLowerCaseTable[other.get(offset) & 0xff] & 0xff; + if (myCharIgnoreCase < otherCharIgnoreCase) { + return -1; + } else if (myCharIgnoreCase > otherCharIgnoreCase) { + return 1; + } + } + + return size == other.getLength() ? 0 : size == len ? -1 : 1; } + } + + /** Returns true if the first bytes in string are equal to the bytes in prefix. */ + @Primitive(name = "string_start_with?") + public abstract static class StartWithNode extends PrimitiveArrayArgumentsNode { + + @Specialization + protected boolean startWithBytes(Object string, Object prefix, RubyEncoding enc, + @Cached TruffleString.RegionEqualByteIndexNode regionEqualByteIndexNode, + @Cached RubyStringLibrary strings, + @Cached RubyStringLibrary stringsSuffix) { + + var stringTString = strings.getTString(string); + var stringEncoding = strings.getTEncoding(string); + final int stringByteLength = stringTString.byteLength(stringEncoding); - protected boolean bothSingleByteOptimizable(Rope stringRope, Rope otherRope) { - return singleByteOptimizableNode.execute(stringRope) && singleByteOptimizableNode.execute(otherRope); + var prefixTString = stringsSuffix.getTString(prefix); + var prefixEncoding = stringsSuffix.getTEncoding(prefix); + final int prefixByteLength = prefixTString.byteLength(prefixEncoding); + + if (stringByteLength < prefixByteLength) { + return false; + } + + // See truffle-string.md, section Encodings Compatibility + if (prefixByteLength == 0) { + return true; + } + + return regionEqualByteIndexNode.execute(stringTString, 0, prefixTString, 0, prefixByteLength, + enc.tencoding); } + } /** Returns true if the last bytes in string are equal to the bytes in suffix. */ @Primitive(name = "string_end_with?") - public abstract static class EndWithNode extends CoreMethodArrayArgumentsNode { - - @Child IsCharacterHeadNode isCharacterHeadNode; + public abstract static class EndWithNode extends PrimitiveArrayArgumentsNode { @Specialization protected boolean endWithBytes(Object string, Object suffix, RubyEncoding enc, - @Cached BytesNode stringBytesNode, - @Cached BytesNode suffixBytesNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsSuffix, + @Cached IsCharacterHeadNode isCharacterHeadNode, + @Cached TruffleString.RegionEqualByteIndexNode regionEqualByteIndexNode, + @Cached RubyStringLibrary strings, + @Cached RubyStringLibrary stringsSuffix, @Cached ConditionProfile isCharacterHeadProfile) { - final Rope stringRope = strings.getRope(string); - final Rope suffixRope = stringsSuffix.getRope(suffix); - final int stringByteLength = stringRope.byteLength(); - final int suffixByteLength = suffixRope.byteLength(); + var stringTString = strings.getTString(string); + var stringEncoding = strings.getEncoding(string); + final int stringByteLength = stringTString.byteLength(stringEncoding.tencoding); + + var suffixTString = stringsSuffix.getTString(suffix); + var suffixEncoding = stringsSuffix.getTEncoding(suffix); + final int suffixByteLength = suffixTString.byteLength(suffixEncoding); if (stringByteLength < suffixByteLength) { return false; } + + // See truffle-string.md, section Encodings Compatibility if (suffixByteLength == 0) { return true; } - final byte[] stringBytes = stringBytesNode.execute(stringRope); - final byte[] suffixBytes = suffixBytesNode.execute(suffixRope); final int offset = stringByteLength - suffixByteLength; - if (isCharacterHeadProfile.profile(!isCharacterHead(enc, stringByteLength, stringBytes, offset))) { + if (isCharacterHeadProfile.profile(!isCharacterHeadNode.execute(stringEncoding, stringTString, offset))) { return false; } - return ArrayUtils.regionEquals(stringBytes, offset, suffixBytes, 0, suffixByteLength); - } - - private boolean isCharacterHead(RubyEncoding enc, int stringByteLength, byte[] stringBytes, int offset) { - if (isCharacterHeadNode == null) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - isCharacterHeadNode = insert(IsCharacterHeadNode.create()); - } - return isCharacterHeadNode.execute(enc, stringBytes, offset, stringByteLength); + return regionEqualByteIndexNode.execute(stringTString, offset, suffixTString, 0, suffixByteLength, + enc.tencoding); } } @@ -1096,175 +965,59 @@ private boolean isCharacterHead(RubyEncoding enc, int stringByteLength, byte[] s public abstract static class CountNode extends CoreMethodArrayArgumentsNode { @Child private ToStrNode toStr = ToStrNode.create(); - @Child private CountRopesNode countRopesNode = CountRopesNode.create(); - @Child private RubyStringLibrary rubyStringLibrary = RubyStringLibrary.getFactory().createDispatched(2); + @Child private StringHelperNodes.CountRopesNode countRopesNode = StringHelperNodes.CountRopesNode.create(); + private final RubyStringLibrary rubyStringLibrary = RubyStringLibrary.create(); + @Child private AsTruffleStringNode asTruffleStringNode = AsTruffleStringNode.create(); @Specialization( guards = "args.length == size", limit = "getDefaultCacheLimit()") - protected int count(VirtualFrame frame, Object string, Object[] args, + protected int count(Object string, Object[] args, @Cached("args.length") int size) { - final RopeWithEncoding[] ropesWithEncs = argRopesWithEncs(frame, args, size); + final TStringWithEncoding[] ropesWithEncs = argRopesWithEncs(args, size); return countRopesNode.executeCount(string, ropesWithEncs); } @Specialization(replaces = "count") - protected int countSlow(VirtualFrame frame, Object string, Object[] args) { - final RopeWithEncoding[] ropesWithEncs = argRopesSlow(frame, args); + protected int countSlow(Object string, Object[] args) { + final TStringWithEncoding[] ropesWithEncs = argRopesSlow(args); return countRopesNode.executeCount(string, ropesWithEncs); } @ExplodeLoop - protected RopeWithEncoding[] argRopesWithEncs(VirtualFrame frame, Object[] args, int size) { - final RopeWithEncoding[] strs = new RopeWithEncoding[args.length]; + protected TStringWithEncoding[] argRopesWithEncs(Object[] args, int size) { + final TStringWithEncoding[] strs = new TStringWithEncoding[args.length]; for (int i = 0; i < size; i++) { final Object string = toStr.execute(args[i]); - strs[i] = new RopeWithEncoding( - rubyStringLibrary.getRope(string), + strs[i] = new TStringWithEncoding( + asTruffleStringNode, + rubyStringLibrary.getTString(string), rubyStringLibrary.getEncoding(string)); } return strs; } - protected RopeWithEncoding[] argRopesSlow(VirtualFrame frame, Object[] args) { - final RopeWithEncoding[] strs = new RopeWithEncoding[args.length]; + protected TStringWithEncoding[] argRopesSlow(Object[] args) { + final TStringWithEncoding[] strs = new TStringWithEncoding[args.length]; for (int i = 0; i < args.length; i++) { final Object string = toStr.execute(args[i]); - strs[i] = new RopeWithEncoding( - rubyStringLibrary.getRope(string), + strs[i] = new TStringWithEncoding( + asTruffleStringNode, + rubyStringLibrary.getTString(string), rubyStringLibrary.getEncoding(string)); } return strs; } } - @ImportStatic({ StringGuards.class, StringOperations.class }) - public abstract static class CountRopesNode extends TrTableNode { - - public static CountRopesNode create() { - return CountRopesNodeFactory.create(null); - } - - public abstract int executeCount(Object string, RopeWithEncoding[] ropesWithEncs); - - @Specialization(guards = "isEmpty(strings.getRope(string))") - protected int count(Object string, Object[] args, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - return 0; - } - - @Specialization( - guards = { - "cachedArgs.length > 0", - "!isEmpty(libString.getRope(string))", - "cachedArgs.length == args.length", - "argsMatch(cachedArgs, args)", - "encodingsMatch(libString.getRope(string), cachedEncoding)" }) - protected int countFast(Object string, RopeWithEncoding[] args, - @Cached(value = "args", dimensions = 1) RopeWithEncoding[] cachedArgs, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString, - @Cached("libString.getRope(string).encoding") Encoding cachedEncoding, - @Cached(value = "squeeze()", dimensions = 1) boolean[] squeeze, - @Cached("findEncoding(stringToRopeWithEncoding(libString, string), cachedArgs)") RubyEncoding compatEncoding, - @Cached("makeTables(cachedArgs, squeeze, compatEncoding)") TrTables tables) { - return processStr(libString.getRope(string), squeeze, compatEncoding, tables); - } - - @TruffleBoundary - private int processStr(Rope rope, boolean[] squeeze, RubyEncoding compatEncoding, TrTables tables) { - return StringSupport.strCount(rope, squeeze, tables, compatEncoding.jcoding, this); - } - - @Specialization(guards = "!isEmpty(libString.getRope(string))") - protected int count(Object string, RopeWithEncoding[] ropesWithEncs, - @Cached BranchProfile errorProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - if (ropesWithEncs.length == 0) { - errorProfile.enter(); - throw new RaiseException(getContext(), coreExceptions().argumentErrorEmptyVarargs(this)); - } - - RubyEncoding enc = findEncoding( - new RopeWithEncoding(libString.getRope(string), libString.getEncoding(string)), - ropesWithEncs); - return countSlow(libString.getRope(string), ropesWithEncs, enc); - } - - @TruffleBoundary - private int countSlow(Rope stringRope, RopeWithEncoding[] ropesWithEncs, RubyEncoding enc) { - final boolean[] table = squeeze(); - final StringSupport.TrTables tables = makeTables(ropesWithEncs, table, enc); - return processStr(stringRope, table, enc, tables); - } - } - - public abstract static class TrTableNode extends CoreMethodArrayArgumentsNode { - @Child protected EncodingNodes.CheckStringEncodingNode checkEncodingNode = EncodingNodes.CheckStringEncodingNode - .create(); - @Child protected RopeNodes.EqualNode ropeEqualNode = RopeNodes.EqualNode.create(); - - protected boolean[] squeeze() { - return new boolean[StringSupport.TRANS_SIZE + 1]; - } - - protected RopeWithEncoding stringToRopeWithEncoding(RubyStringLibrary strings, Object string) { - return new RopeWithEncoding(strings.getRope(string), strings.getEncoding(string)); - } - - protected RubyEncoding findEncoding(RopeWithEncoding ropeWithEnc, RopeWithEncoding[] ropes) { - RubyEncoding enc = checkEncodingNode.executeCheckEncoding(ropeWithEnc, ropes[0]); - for (int i = 1; i < ropes.length; i++) { - enc = checkEncodingNode.executeCheckEncoding(ropeWithEnc, ropes[i]); - } - return enc; - } - - protected TrTables makeTables(RopeWithEncoding[] ropesWithEncs, boolean[] squeeze, RubyEncoding enc) { - // The trSetupTable method will consume the bytes from the rope one encoded character at a time and - // build a TrTable from this. Previously we started with the encoding of rope zero, and at each - // stage found a compatible encoding to build that TrTable with. Although we now calculate a single - // encoding with which to build the tables it must be compatible with all ropes, so will not - // affect the consumption of characters from those ropes. - StringSupport.TrTables tables = StringSupport.trSetupTable( - ropesWithEncs[0].getRope(), - squeeze, - null, - true, - enc.jcoding, - this); - - for (int i = 1; i < ropesWithEncs.length; i++) { - tables = StringSupport - .trSetupTable(ropesWithEncs[i].getRope(), squeeze, tables, false, enc.jcoding, this); - } - return tables; - } - - protected boolean encodingsMatch(Rope rope, Encoding encoding) { - return encoding == rope.getEncoding(); - } - - @ExplodeLoop - protected boolean argsMatch(RopeWithEncoding[] cachedRopes, RopeWithEncoding[] ropes) { - for (int i = 0; i < cachedRopes.length; i++) { - if (!ropeEqualNode.execute(cachedRopes[i].getRope(), ropes[i].getRope())) { - return false; - } - if (cachedRopes[i].getEncoding() != ropes[i].getEncoding()) { - return false; - } - } - return true; - } - } - @CoreMethod(names = "delete!", rest = true, raiseIfNotMutableSelf = true) @ImportStatic(StringGuards.class) public abstract static class DeleteBangNode extends CoreMethodArrayArgumentsNode { @Child private ToStrNode toStr = ToStrNode.create(); @Child private DeleteBangRopesNode deleteBangRopesNode = DeleteBangRopesNode.create(); - @Child private RubyStringLibrary rubyStringLibrary = RubyStringLibrary.getFactory().createDispatched(2); + private final RubyStringLibrary rubyStringLibrary = RubyStringLibrary.create(); + @Child private AsTruffleStringNode asTruffleStringNode = AsTruffleStringNode.create(); public static DeleteBangNode create() { return DeleteBangNodeFactory.create(null); @@ -1275,296 +1028,384 @@ public static DeleteBangNode create() { @Specialization(guards = "args.length == size", limit = "getDefaultCacheLimit()") protected Object deleteBang(RubyString string, Object[] args, @Cached("args.length") int size) { - final RopeWithEncoding[] ropesWithEncs = argRopesWithEncs(args, size); + final TStringWithEncoding[] ropesWithEncs = argRopesWithEncs(args, size); return deleteBangRopesNode.executeDeleteBang(string, ropesWithEncs); } @Specialization(replaces = "deleteBang") protected Object deleteBangSlow(RubyString string, Object[] args) { - final RopeWithEncoding[] ropes = argRopesWithEncsSlow(args); + final TStringWithEncoding[] ropes = argRopesWithEncsSlow(args); return deleteBangRopesNode.executeDeleteBang(string, ropes); } @ExplodeLoop - protected RopeWithEncoding[] argRopesWithEncs(Object[] args, int size) { - final RopeWithEncoding[] strs = new RopeWithEncoding[size]; + protected TStringWithEncoding[] argRopesWithEncs(Object[] args, int size) { + final TStringWithEncoding[] strs = new TStringWithEncoding[size]; for (int i = 0; i < size; i++) { final Object string = toStr.execute(args[i]); - strs[i] = new RopeWithEncoding( - rubyStringLibrary.getRope(string), + strs[i] = new TStringWithEncoding( + asTruffleStringNode, + rubyStringLibrary.getTString(string), rubyStringLibrary.getEncoding(string)); } return strs; } - protected RopeWithEncoding[] argRopesWithEncsSlow(Object[] args) { - final RopeWithEncoding[] strs = new RopeWithEncoding[args.length]; + protected TStringWithEncoding[] argRopesWithEncsSlow(Object[] args) { + final TStringWithEncoding[] strs = new TStringWithEncoding[args.length]; for (int i = 0; i < args.length; i++) { final Object string = toStr.execute(args[i]); - strs[i] = new RopeWithEncoding( - rubyStringLibrary.getRope(string), + strs[i] = new TStringWithEncoding( + asTruffleStringNode, + rubyStringLibrary.getTString(string), rubyStringLibrary.getEncoding(string)); } return strs; } } - @ImportStatic({ StringGuards.class, StringOperations.class }) - public abstract static class DeleteBangRopesNode extends TrTableNode { - - public static DeleteBangRopesNode create() { - return DeleteBangRopesNodeFactory.create(null); - } - - public abstract Object executeDeleteBang(RubyString string, RopeWithEncoding[] ropesWithEncs); + @Primitive(name = "string_downcase!", raiseIfNotMutable = 0, lowerFixnum = 1) + @ImportStatic({ StringGuards.class, Config.class }) + public abstract static class StringDowncaseBangPrimitiveNode extends PrimitiveArrayArgumentsNode { - @Specialization(guards = "isEmpty(string.rope)") - protected Object deleteBangEmpty(RubyString string, Object[] args) { - return nil; - } + @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode.create(); + private final ConditionProfile dummyEncodingProfile = ConditionProfile.createBinaryProfile(); @Specialization( - guards = { - "cachedArgs.length > 0", - "!isEmpty(string.rope)", - "cachedArgs.length == args.length", - "argsMatch(cachedArgs, args)", - "encodingsMatch(libString.getRope(string), cachedEncoding)" }) - protected Object deleteBangFast(RubyString string, RopeWithEncoding[] args, - @Cached(value = "args", dimensions = 1) RopeWithEncoding[] cachedArgs, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString, - @Cached("libString.getRope(string).encoding") Encoding cachedEncoding, - @Cached(value = "squeeze()", dimensions = 1) boolean[] squeeze, - @Cached("findEncoding(stringToRopeWithEncoding(libString, string), cachedArgs)") RubyEncoding compatEncoding, - @Cached("makeTables(cachedArgs, squeeze, compatEncoding)") TrTables tables, - @Cached BranchProfile nullProfile) { - final Rope processedRope = processStr(string, squeeze, compatEncoding, tables); - if (processedRope == null) { - nullProfile.enter(); - return nil; + guards = "!isComplexCaseMapping(tstring, encoding, caseMappingOptions, singleByteOptimizableNode)") + protected Object downcaseAsciiCodePoints(RubyString string, int caseMappingOptions, + @Cached RubyStringLibrary libString, + @Cached("createUpperToLower()") StringHelperNodes.InvertAsciiCaseNode invertAsciiCaseNode, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { + if (dummyEncodingProfile.profile(encoding.isDummy)) { + throw new RaiseException( + getContext(), + coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(encoding, this)); } - string.setRope(processedRope); - return string; + return invertAsciiCaseNode.executeInvert(string); } - @Specialization(guards = "!isEmpty(string.rope)") - protected Object deleteBang(RubyString string, RopeWithEncoding[] args, - @Cached BranchProfile errorProfile) { - if (args.length == 0) { - errorProfile.enter(); - throw new RaiseException(getContext(), coreExceptions().argumentErrorEmptyVarargs(this)); + @Specialization( + guards = "isComplexCaseMapping(tstring, encoding, caseMappingOptions, singleByteOptimizableNode)") + protected Object downcaseMultiByteComplex(RubyString string, int caseMappingOptions, + @Cached RubyStringLibrary libString, + @Cached GetByteCodeRangeNode codeRangeNode, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode, + @Cached ConditionProfile modifiedProfile, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { + if (dummyEncodingProfile.profile(encoding.isDummy)) { + throw new RaiseException( + getContext(), + coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(encoding, this)); } - RubyEncoding enc = findEncoding(new RopeWithEncoding(string.rope, string.encoding), args); - - return deleteBangSlow(string, args, enc); - } + var byteArray = byteArrayNode.execute(tstring, encoding.tencoding); - @TruffleBoundary - private Object deleteBangSlow(RubyString string, RopeWithEncoding[] ropesWithEncs, RubyEncoding enc) { - final boolean[] squeeze = new boolean[StringSupport.TRANS_SIZE + 1]; + // TODO (nirvdrum 24-Jun-22): Make the byte array builder copy-on-write so we don't eagerly clone the source byte array. + var builder = ByteArrayBuilder.create(byteArray); - final StringSupport.TrTables tables = makeTables(ropesWithEncs, squeeze, enc); + var cr = codeRangeNode.execute(string.tstring, encoding.tencoding); + final boolean modified = StringSupport + .downcaseMultiByteComplex(encoding.jcoding, cr, builder, caseMappingOptions, this); - final Rope processedRope = processStr(string, squeeze, enc, tables); - if (processedRope == null) { + if (modifiedProfile.profile(modified)) { + string.setTString(fromByteArrayNode.execute(builder.getBytes(), encoding.tencoding, false)); + return string; + } else { return nil; } - - string.setRope(processedRope); - // REVIEW encoding set - - return string; } - @TruffleBoundary - private Rope processStr(RubyString string, boolean[] squeeze, RubyEncoding enc, StringSupport.TrTables tables) { - return StringSupport.delete_bangCommon19(string.rope, squeeze, tables, enc.jcoding, this); - } } - @Primitive(name = "string_downcase!", raiseIfNotMutable = 0, lowerFixnum = 1) - @ImportStatic({ StringGuards.class, Config.class }) - public abstract static class StringDowncaseBangPrimitiveNode extends PrimitiveArrayArgumentsNode { - - @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode - .create(); + @CoreMethod(names = "each_byte", needsBlock = true, enumeratorSize = "bytesize") + public abstract static class EachByteNode extends YieldingCoreMethodNode { - @Specialization(guards = { "isSingleByteCaseMapping(string, caseMappingOptions, singleByteOptimizableNode)" }) - protected Object downcaseSingleByte(RubyString string, int caseMappingOptions, - @Cached("createUpperToLower()") InvertAsciiCaseNode invertAsciiCaseNode) { - return invertAsciiCaseNode.executeInvert(string); + public static EachByteNode create() { + return StringNodesFactory.EachByteNodeFactory.create(null); } - @Specialization(guards = { "isSimpleAsciiCaseMapping(string, caseMappingOptions, singleByteOptimizableNode)" }) - protected Object downcaseMultiByteAsciiSimple(RubyString string, int caseMappingOptions, - @Cached @Shared("bytesNode") BytesNode bytesNode, - @Cached CharacterLengthNode characterLengthNode, - @Cached @Shared("codeRangeNode") CodeRangeNode codeRangeNode, - @Cached @Shared("makeLeafRopeNode") MakeLeafRopeNode makeLeafRopeNode, - @Cached @Shared("dummyEncodingProfile") ConditionProfile dummyEncodingProfile, - @Cached @Shared("modifiedProfile") ConditionProfile modifiedProfile) { - final Rope rope = string.rope; - final Encoding encoding = rope.getEncoding(); + public abstract Object execute(Object string, RubyProc block); - if (dummyEncodingProfile.profile(encoding.isDummy())) { - throw new RaiseException( - getContext(), - coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(encoding, this)); - } + // use separate specialization instances for getTString() in the loop + @Specialization(guards = "strings.seen(string)", limit = "2") + protected Object eachByte(Object string, RubyProc block, + @Cached RubyStringLibrary strings, + @Cached TruffleString.MaterializeNode materializeNode, + @Cached TruffleString.ReadByteNode readByteNode) { + var tstring = strings.getTString(string); + var encoding = strings.getEncoding(string).tencoding; - final CodeRange cr = codeRangeNode.execute(rope); - final byte[] inputBytes = bytesNode.execute(rope); - final byte[] outputBytes = StringSupport.downcaseMultiByteAsciiSimple(encoding, cr, inputBytes); + // String#each_byte reflects changes by the block to the string's bytes + materializeNode.execute(tstring, encoding); + for (int i = 0; i < tstring.byteLength(encoding); i++) { + int singleByte = readByteNode.execute(tstring, i, encoding); + callBlock(block, singleByte); - if (modifiedProfile.profile(inputBytes != outputBytes)) { - string.setRope( - makeLeafRopeNode.executeMake(outputBytes, encoding, cr, characterLengthNode.execute(rope))); - return string; - } else { - return nil; + tstring = strings.getTString(string); + encoding = strings.getEncoding(string).tencoding; } + + return string; } + } - @Specialization(guards = { "isComplexCaseMapping(string, caseMappingOptions, singleByteOptimizableNode)" }) - protected Object downcaseMultiByteComplex(RubyString string, int caseMappingOptions, - @Cached @Shared("bytesNode") BytesNode bytesNode, - @Cached @Shared("codeRangeNode") CodeRangeNode codeRangeNode, - @Cached @Shared("makeLeafRopeNode") MakeLeafRopeNode makeLeafRopeNode, - @Cached @Shared("dummyEncodingProfile") ConditionProfile dummyEncodingProfile, - @Cached @Shared("modifiedProfile") ConditionProfile modifiedProfile) { - final Rope rope = string.rope; - final Encoding encoding = rope.getEncoding(); - - if (dummyEncodingProfile.profile(encoding.isDummy())) { - throw new RaiseException( - getContext(), - coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(encoding, this)); - } + @CoreMethod(names = "bytes", needsBlock = true) + public abstract static class StringBytesNode extends CoreMethodArrayArgumentsNode { - final RopeBuilder builder = RopeBuilder.createRopeBuilder(bytesNode.execute(rope), rope.getEncoding()); - final boolean modified = StringSupport - .downcaseMultiByteComplex(encoding, codeRangeNode.execute(rope), builder, caseMappingOptions, this); + // use separate specialization instances for getTString() in the loop + @Specialization(guards = "strings.seen(string)", limit = "2") + protected RubyArray bytesWithoutBlock(Object string, Nil block, + @Cached RubyStringLibrary strings, + @Cached TruffleString.MaterializeNode materializeNode, + @Cached TruffleString.ReadByteNode readByteNode) { + var tstring = strings.getTString(string); + var encoding = strings.getEncoding(string).tencoding; + int arrayLength = tstring.byteLength(encoding); - if (modifiedProfile.profile(modified)) { - string.setRope( - makeLeafRopeNode - .executeMake(builder.getBytes(), rope.getEncoding(), CR_UNKNOWN, NotProvided.INSTANCE)); + final int[] store = new int[arrayLength]; - return string; - } else { - return nil; + materializeNode.execute(tstring, encoding); + for (int i = 0; i < arrayLength; i++) { + store[i] = readByteNode.execute(tstring, i, encoding); } + + return createArray(store); } + @Specialization + protected Object bytesWithBlock(Object string, RubyProc block, + @Cached EachByteNode eachByteNode) { + return eachByteNode.execute(string, block); + } } - @CoreMethod(names = "each_byte", needsBlock = true, enumeratorSize = "bytesize") - public abstract static class EachByteNode extends YieldingCoreMethodNode { + @CoreMethod(names = "each_char", needsBlock = true, enumeratorSize = "size") + public abstract static class EachCharNode extends YieldingCoreMethodNode { + + public static EachCharNode create() { + return StringNodesFactory.EachCharNodeFactory.create(null); + } + + public abstract Object execute(Object string, RubyProc block); - @SuppressFBWarnings("SA") @Specialization - protected Object eachByte(Object string, RubyProc block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached BytesNode bytesNode, - @Cached BytesNode updatedBytesNode, - @Cached ConditionProfile ropeChangedProfile) { - Rope rope = strings.getRope(string); - byte[] bytes = bytesNode.execute(rope); - - for (int i = 0; i < bytes.length; i++) { - callBlock(block, bytes[i] & 0xff); - - Rope updatedRope = strings.getRope(string); - if (ropeChangedProfile.profile(rope != updatedRope)) { - rope = updatedRope; - bytes = updatedBytesNode.execute(updatedRope); - } + protected Object eachChar(Object string, RubyProc block, + @Cached RubyStringLibrary strings, + @Cached TruffleString.SubstringByteIndexNode substringNode, + @Cached TruffleString.ByteLengthOfCodePointNode byteLengthOfCodePointNode) { + // Unlike String#each_byte, String#each_char does not make + // modifications to the string visible to the rest of the iteration. + var tstring = strings.getTString(string); + var encoding = strings.getEncoding(string); + var tencoding = encoding.tencoding; + final int byteLength = tstring.byteLength(tencoding); + + int clen; + for (int i = 0; i < byteLength; i += clen) { + clen = byteLengthOfCodePointNode.execute(tstring, i, tencoding); + callBlock(block, createSubString(substringNode, tstring, encoding, i, clen)); } return string; } + } + + @CoreMethod(names = "chars", needsBlock = true) + public abstract static class CharsNode extends CoreMethodArrayArgumentsNode { + + @Specialization + protected Object charsWithoutBlock(Object string, Nil unusedBlock, + @Cached RubyStringLibrary strings, + @Cached TruffleString.SubstringByteIndexNode substringNode, + @Cached TruffleString.ByteLengthOfCodePointNode byteLengthOfCodePointNode, + @Cached TruffleString.CodePointLengthNode codePointLengthNode) { + // Unlike String#each_byte, String#chars does not make + // modifications to the string visible to the rest of the iteration. + var tstring = strings.getTString(string); + var encoding = strings.getEncoding(string); + var tencoding = encoding.tencoding; + final int byteLength = tstring.byteLength(tencoding); + + int codePointLength = codePointLengthNode.execute(tstring, tencoding); + Object[] chars = new Object[codePointLength]; + + int characterIndex = 0; + int clen; + for (int i = 0; i < byteLength; i += clen) { + clen = byteLengthOfCodePointNode.execute(tstring, i, encoding.tencoding); + chars[characterIndex++] = createSubString(substringNode, tstring, encoding, i, clen); + } + + return createArray(chars); + } + @Specialization + protected Object charsWithBlock(Object string, RubyProc block, + @Cached EachCharNode eachCharNode) { + return eachCharNode.execute(string, block); + } } - @CoreMethod(names = "each_char", needsBlock = true, enumeratorSize = "size") + @CoreMethod(names = "each_codepoint", needsBlock = true, enumeratorSize = "size") @ImportStatic(StringGuards.class) - public abstract static class EachCharNode extends YieldingCoreMethodNode { + public abstract static class EachCodePointNode extends YieldingCoreMethodNode { + + public static EachCodePointNode create() { + return StringNodesFactory.EachCodePointNodeFactory.create(null); + } - @Child private SubstringNode substringNode = SubstringNode.create(); - @Child private BytesNode bytesNode = BytesNode.create(); + public abstract Object execute(Object string, RubyProc block); @Specialization - protected Object eachChar(Object string, RubyProc block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached CalculateCharacterLengthNode calculateCharacterLengthNode, - @Cached CodeRangeNode codeRangeNode) { - final Rope rope = strings.getRope(string); - final RubyEncoding encoding = strings.getEncoding(string); - final byte[] ptrBytes = bytesNode.execute(rope); - final int len = ptrBytes.length; - final Encoding enc = rope.getEncoding(); - final CodeRange cr = codeRangeNode.execute(rope); - - int n; + protected Object eachCodePoint(Object string, RubyProc block, + @Cached RubyStringLibrary strings, + @Cached CreateCodePointIteratorNode createCodePointIteratorNode, + @Cached TruffleStringIterator.NextNode nextNode, + @Cached BranchProfile invalidCodePointProfile) { + // Unlike String#each_byte, String#each_codepoint does not make + // modifications to the string visible to the rest of the iteration. + var tstring = strings.getTString(string); + var encoding = strings.getEncoding(string); + var tencoding = encoding.tencoding; + var iterator = createCodePointIteratorNode.execute(tstring, tencoding, ErrorHandling.RETURN_NEGATIVE); + + while (iterator.hasNext()) { + int codePoint = nextNode.execute(iterator); + + if (codePoint == -1) { + invalidCodePointProfile.enter(); + throw new RaiseException(getContext(), + coreExceptions().argumentErrorInvalidByteSequence(encoding, this)); + } - for (int i = 0; i < len; i += n) { - n = calculateCharacterLengthNode - .characterLengthWithRecovery(enc, cr, Bytes.fromRange(ptrBytes, i, len)); - callBlock(block, substr(rope, encoding, i, n, coreLibrary().stringClass)); + callBlock(block, codePoint); } return string; } - // TODO (nirvdrum 10-Mar-15): This was extracted from JRuby, but likely will need to become a primitive. - // Don't be tempted to extract the rope from the passed string. If the block being yielded to modifies the - // source string, you'll get a different rope. Unlike String#each_byte, String#each_char does not make - // modifications to the string visible to the rest of the iteration. - private Object substr(Rope rope, RubyEncoding encoding, int beg, int len, RubyClass logicalClass) { - int length = rope.byteLength(); - if (len < 0 || beg > length) { - return nil; - } + } - if (beg < 0) { - beg += length; - if (beg < 0) { - return nil; + @CoreMethod(names = "codepoints", needsBlock = true) + @ImportStatic({ StringGuards.class }) + public abstract static class CodePointsNode extends YieldingCoreMethodNode { + + @Specialization + protected Object codePointsWithoutBlock(Object string, Nil unusedBlock, + @Cached RubyStringLibrary strings, + @Cached CreateCodePointIteratorNode createCodePointIteratorNode, + @Cached TruffleStringIterator.NextNode nextNode, + @Cached TruffleString.CodePointLengthNode codePointLengthNode, + @Cached BranchProfile invalidCodePointProfile) { + // Unlike String#each_byte, String#codepoints does not make + // modifications to the string visible to the rest of the iteration. + var tstring = strings.getTString(string); + var encoding = strings.getEncoding(string); + var tencoding = encoding.tencoding; + + int codePointLength = codePointLengthNode.execute(tstring, tencoding); + int[] codePoints = new int[codePointLength]; + + var iterator = createCodePointIteratorNode.execute(tstring, tencoding, ErrorHandling.RETURN_NEGATIVE); + + int i = 0; + while (iterator.hasNext()) { + int codePoint = nextNode.execute(iterator); + + if (codePoint == -1) { + invalidCodePointProfile.enter(); + throw new RaiseException(getContext(), + coreExceptions().argumentErrorInvalidByteSequence(encoding, this)); } + + codePoints[i++] = codePoint; } - int end = Math.min(length, beg + len); - final Rope substringRope = substringNode.executeSubstring(rope, beg, end - beg); - final RubyString ret = new RubyString( - logicalClass, - getLanguage().stringShape, - false, - substringRope, - encoding); - AllocationTracing.trace(ret, this); - return ret; + return createArray(codePoints); + } + + @Specialization + protected Object codePointsWithBlock(Object string, RubyProc block, + @Cached EachCodePointNode eachCodePointNode) { + return eachCodePointNode.execute(string, block); } + } + @ImportStatic(StringGuards.class) @CoreMethod(names = "force_encoding", required = 1, raiseIfNotMutableSelf = true) public abstract static class ForceEncodingNode extends CoreMethodArrayArgumentsNode { - @Child private WithEncodingNode withEncodingNode = WithEncodingNode.create(); - private final ConditionProfile differentEncodingProfile = ConditionProfile.create(); - public abstract RubyString execute(Object string, Object other); + protected abstract RubyString execute(Object string, RubyEncoding other); + public static ForceEncodingNode create() { return StringNodesFactory.ForceEncodingNodeFactory.create(null); } - @Specialization(guards = "libEncoding.isRubyString(encoding)") - protected RubyString forceEncodingString(RubyString string, Object encoding, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libEncoding, + @Specialization(guards = "string.getEncodingUnprofiled() == newEncoding") + protected RubyString sameEncoding(RubyString string, RubyEncoding newEncoding) { + return string; + } + + @Specialization(guards = { "encoding != newEncoding", "tstring.isImmutable()" }) + protected RubyString immutable(RubyString string, RubyEncoding newEncoding, + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary profileEncoding, + @Cached TruffleString.ForceEncodingNode forceEncodingNode, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { + var newEncodingProfiled = profileEncoding.profileEncoding(newEncoding); + var newTString = forceEncodingNode.execute(tstring, encoding.tencoding, newEncodingProfiled.tencoding); + string.setTString(newTString, newEncodingProfiled); + return string; + } + + @Specialization( + guards = { "encoding != newEncoding", "!tstring.isImmutable()", "!tstring.isNative()" }) + protected RubyString mutableManaged(RubyString string, RubyEncoding newEncoding, + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary profileEncoding, + @Cached MutableTruffleString.ForceEncodingNode forceEncodingNode, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { + var newEncodingProfiled = profileEncoding.profileEncoding(newEncoding); + var newTString = forceEncodingNode.execute(tstring, encoding.tencoding, newEncodingProfiled.tencoding); + string.setTString(newTString, newEncodingProfiled); + return string; + } + + @Specialization( + guards = { "encoding != newEncoding", "!tstring.isImmutable()", "tstring.isNative()" }) + protected RubyString mutableNative(RubyString string, RubyEncoding newEncoding, + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary profileEncoding, + @Cached TruffleString.GetInternalNativePointerNode getInternalNativePointerNode, + @Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { + var newEncodingProfiled = profileEncoding.profileEncoding(newEncoding); + var currentEncoding = encoding.tencoding; + var pointer = (Pointer) getInternalNativePointerNode.execute(tstring, currentEncoding); + var byteLength = tstring.byteLength(currentEncoding); + var newTString = fromNativePointerNode.execute(pointer, 0, byteLength, newEncodingProfiled.tencoding, + false); + string.setTString(newTString, newEncodingProfiled); + return string; + } + + @Specialization(guards = "libEncoding.isRubyString(newEncoding)", limit = "1") + protected RubyString forceEncodingString(RubyString string, Object newEncoding, + @Cached RubyStringLibrary libEncoding, + @Cached ToJavaStringNode toJavaStringNode, @Cached BranchProfile errorProfile) { - final String stringName = libEncoding.getJavaString(encoding); + final String stringName = toJavaStringNode.executeToJavaString(newEncoding); final RubyEncoding rubyEncoding = getContext().getEncodingManager().getRubyEncoding(stringName); if (rubyEncoding == null) { @@ -1574,70 +1415,41 @@ protected RubyString forceEncodingString(RubyString string, Object encoding, coreExceptions().argumentError(Utils.concat("unknown encoding name - ", stringName), this)); } - return forceEncodingEncoding(string, rubyEncoding); - } - - @Specialization - protected RubyString forceEncodingEncoding(RubyString string, RubyEncoding encoding) { - - if (differentEncodingProfile.profile(string.encoding != encoding)) { - final Encoding javaEncoding = encoding.jcoding; - final Rope rope = string.rope; - final Rope newRope = withEncodingNode.executeWithEncoding(rope, javaEncoding); - string.setRope(newRope, encoding); - } - - return string; + return execute(string, rubyEncoding); } - @Specialization(guards = { "isNotRubyString(encoding)", "!isRubyEncoding(encoding)" }) - protected RubyString forceEncoding(RubyString string, Object encoding, + @Specialization(guards = { "!isRubyEncoding(newEncoding)", "isNotRubyString(newEncoding)" }) + protected RubyString forceEncoding(RubyString string, Object newEncoding, @Cached ToStrNode toStrNode, @Cached ForceEncodingNode forceEncodingNode) { - return forceEncodingNode.execute(string, toStrNode.execute(encoding)); + return forceEncodingNode.execute(string, toStrNode.execute(newEncoding)); } - } @CoreMethod(names = "getbyte", required = 1, lowerFixnum = 1) public abstract static class StringGetByteNode extends CoreMethodArrayArgumentsNode { - @Child private NormalizeIndexNode normalizeIndexNode = NormalizeIndexNode.create(); - @Child private GetByteNode ropeGetByteNode = GetByteNode.create(); + @Child private StringHelperNodes.NormalizeIndexNode normalizeIndexNode = StringHelperNodes.NormalizeIndexNode + .create(); + @Child private TruffleString.ReadByteNode readByteNode = TruffleString.ReadByteNode.create(); @Specialization protected Object getByte(Object string, int index, @Cached ConditionProfile indexOutOfBoundsProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - final Rope rope = libString.getRope(string); - final int normalizedIndex = normalizeIndexNode.executeNormalize(index, rope.byteLength()); + @Cached RubyStringLibrary libString) { + var tstring = libString.getTString(string); + var encoding = libString.getEncoding(string).tencoding; + int byteLength = tstring.byteLength(encoding); - if (indexOutOfBoundsProfile.profile((normalizedIndex < 0) || (normalizedIndex >= rope.byteLength()))) { + final int normalizedIndex = normalizeIndexNode.executeNormalize(index, byteLength); + + if (indexOutOfBoundsProfile.profile((normalizedIndex < 0) || (normalizedIndex >= byteLength))) { return nil; } - return ropeGetByteNode.executeGetByte(rope, normalizedIndex); - } - - } - - @GenerateUncached - public abstract static class HashStringNode extends RubyBaseNode { - - protected static final int CLASS_SALT = 54008340; // random number, stops hashes for similar values but different classes being the same, static because we want deterministic hashes - - public static HashStringNode create() { - return StringNodesFactory.HashStringNodeGen.create(); + return readByteNode.execute(tstring, normalizedIndex, encoding); } - public abstract long execute(Object string); - - @Specialization - protected long hash(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached RopeNodes.HashNode hashNode) { - return getContext().getHashing(this).hash(CLASS_SALT, hashNode.execute(strings.getRope(string))); - } } @CoreMethod(names = "hash") @@ -1653,7 +1465,7 @@ public static HashNode create() { @Specialization protected long hash(Object string, - @Cached HashStringNode hash) { + @Cached StringHelperNodes.HashStringNode hash) { return hash.execute(string); } } @@ -1662,8 +1474,10 @@ protected long hash(Object string, public abstract static class InitializeNode extends CoreMethodArrayArgumentsNode { @Specialization - protected RubyString initializeJavaString(RubyString string, String from, RubyEncoding encoding) { - string.setRope(StringOperations.encodeRope(from, encoding.jcoding), encoding); + protected RubyString initializeJavaString(RubyString string, String from, RubyEncoding encoding, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { + var tstring = fromJavaStringNode.execute(from, encoding.tencoding); + string.setTString(tstring, encoding); return string; } @@ -1676,55 +1490,42 @@ protected RubyString initializeJavaStringNoEncoding(RubyString string, String fr this)); } - @Specialization(guards = "stringsFrom.isRubyString(from)") + @Specialization(guards = "stringsFrom.isRubyString(from)", limit = "1") protected RubyString initialize(RubyString string, Object from, Object encoding, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsFrom) { - string.setRope(stringsFrom.getRope(from), stringsFrom.getEncoding(from)); + @Cached RubyStringLibrary stringsFrom) { + string.setTString(stringsFrom.getTString(from), stringsFrom.getEncoding(from)); return string; } @Specialization(guards = { "isNotRubyString(from)", "!isString(from)" }) protected RubyString initialize(VirtualFrame frame, RubyString string, Object from, Object encoding, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringLibrary, + @Cached RubyStringLibrary stringLibrary, @Cached ToStrNode toStrNode) { final Object stringFrom = toStrNode.execute(from); - string.setRope(stringLibrary.getRope(stringFrom), stringLibrary.getEncoding(stringFrom)); + string.setTString(stringLibrary.getTString(stringFrom), stringLibrary.getEncoding(stringFrom)); return string; } } @Primitive(name = "string_get_coderange") - public abstract static class GetCodeRangeNode extends CoreMethodArrayArgumentsNode { - + public abstract static class GetCodeRangeAsIntNode extends PrimitiveArrayArgumentsNode { @Specialization protected int getCodeRange(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached CodeRangeNode codeRangeNode) { - return codeRangeNode.execute(strings.getRope(string)).toInt(); - } - - } - - public abstract static class StringGetAssociatedNode extends RubyBaseNode { - - public static StringNodes.StringGetAssociatedNode create() { - return StringNodesFactory.StringGetAssociatedNodeGen.create(); - } - - public abstract Object execute(Object string); - - @Specialization(limit = "getDynamicObjectCacheLimit()") - protected Object getAssociated(RubyString string, - @CachedLibrary("string") DynamicObjectLibrary objectLibrary) { - return objectLibrary.getOrDefault(string, Layouts.ASSOCIATED_IDENTIFIER, null); - } - - @Specialization - protected Object getAssociatedImmutable(ImmutableRubyString string) { - return null; + @Cached RubyStringLibrary strings, + @Cached GetByteCodeRangeNode codeRangeNode) { + final var tstring = strings.getTString(string); + + var codeRange = codeRangeNode.execute(tstring, strings.getTEncoding(string)); + if (codeRange == ASCII) { + return 1; + } else if (codeRange == VALID) { + return 2; + } else { + assert codeRange == BROKEN; + return 3; + } } - } @CoreMethod(names = "initialize_copy", required = 1, raiseIfNotMutableSelf = true) @@ -1737,31 +1538,63 @@ protected Object initializeCopySelfIsSameAsFrom(RubyString self, Object from) { return self; } - @Specialization( - guards = { - "stringsFrom.isRubyString(from)", - "!areEqual(self, from)", - "!isNativeRope(stringsFrom.getRope(from))" }) - protected Object initializeCopy(RubyString self, Object from, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsFrom, - @Cached @Shared("stringGetAssociatedNode") StringGetAssociatedNode stringGetAssociatedNode) { - self.setRope(stringsFrom.getRope(from), stringsFrom.getEncoding(from)); + @Specialization(guards = { + "stringsFrom.isRubyString(from)", + "!areEqual(self, from)", + "!tstring.isNative()", + "tstring.isImmutable()" }, limit = "1") + protected Object initializeCopyImmutable(RubyString self, Object from, + @Cached RubyStringLibrary stringsFrom, + @Cached @Shared("stringGetAssociatedNode") StringHelperNodes.StringGetAssociatedNode stringGetAssociatedNode, + @Bind("stringsFrom.getTString(from)") AbstractTruffleString tstring) { + self.setTString(tstring, stringsFrom.getEncoding(from)); + final Object associated = stringGetAssociatedNode.execute(from); copyAssociated(self, associated); return self; } - @Specialization( - guards = { - "stringsFrom.isRubyString(from)", - "!areEqual(self, from)", - "isNativeRope(stringsFrom.getRope(from))" }) - protected Object initializeCopyFromNative(RubyString self, Object from, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsFrom, - @Cached @Shared("stringGetAssociatedNode") StringGetAssociatedNode stringGetAssociatedNode) { - self.setRope( - ((NativeRope) stringsFrom.getRope(from)).makeCopy(getLanguage()), - stringsFrom.getEncoding(from)); + @Specialization(guards = { + "stringsFrom.isRubyString(from)", + "!areEqual(self, from)", + "!tstring.isNative()", + "tstring.isMutable()" }, limit = "1") + protected Object initializeCopyMutable(RubyString self, Object from, + @Cached RubyStringLibrary stringsFrom, + @Cached @Shared("stringGetAssociatedNode") StringHelperNodes.StringGetAssociatedNode stringGetAssociatedNode, + @Cached MutableTruffleString.SubstringByteIndexNode copyMutableTruffleStringNode, + @Bind("stringsFrom.getTString(from)") AbstractTruffleString tstring) { + var encoding = stringsFrom.getEncoding(from); + var tencoding = encoding.tencoding; + int byteLength = tstring.byteLength(tencoding); + // TODO (eregon, 2022): Should the copy be a MutableTruffleString too, or TruffleString with AsTruffleStringNode? + MutableTruffleString copy = copyMutableTruffleStringNode.execute(tstring, 0, byteLength, tencoding); + self.setTString(copy, encoding); + + final Object associated = stringGetAssociatedNode.execute(from); + copyAssociated(self, associated); + return self; + } + + @Specialization(guards = { "!areEqual(self, from)", "tstring.isNative()" }) + protected Object initializeCopyNative(RubyString self, RubyString from, + @Cached RubyStringLibrary libString, + @Cached @Shared("stringGetAssociatedNode") StringHelperNodes.StringGetAssociatedNode stringGetAssociatedNode, + @Cached TruffleString.GetInternalNativePointerNode getInternalNativePointerNode, + @Cached MutableTruffleString.FromNativePointerNode fromNativePointerNode, + @Bind("from.tstring") AbstractTruffleString tstring) { + var encoding = libString.getEncoding(from); + var tencoding = encoding.tencoding; + final Pointer fromPointer = (Pointer) getInternalNativePointerNode.execute(tstring, tencoding); + + final Pointer newPointer = Pointer.mallocAutoRelease(fromPointer.getSize(), getLanguage()); + newPointer.writeBytes(0, fromPointer, 0, fromPointer.getSize()); + + // TODO (eregon, 2022): should we have the copy be native too, or rather take the opportunity of having to copy to be managed? + assert tstring.isMutable(); + var copy = fromNativePointerNode.execute(newPointer, 0, tstring.byteLength(tencoding), tencoding, false); + self.setTString(copy, encoding); + final Object associated = stringGetAssociatedNode.execute(from); copyAssociated(self, associated); return self; @@ -1781,85 +1614,86 @@ private void copyAssociated(RubyString self, Object associated) { writeAssociatedNode.execute(self, Layouts.ASSOCIATED_IDENTIFIER, associated); } } - - protected boolean isNativeRope(Rope other) { - return other instanceof NativeRope; - } } @CoreMethod(names = "lstrip!", raiseIfNotMutableSelf = true) @ImportStatic(StringGuards.class) public abstract static class LstripBangNode extends CoreMethodArrayArgumentsNode { - @Child private GetCodePointNode getCodePointNode = GetCodePointNode.create(); - @Child private SubstringNode substringNode = SubstringNode.create(); + @Child TruffleString.SubstringByteIndexNode substringNode; - @Specialization(guards = "isEmpty(string.rope)") + @Specialization(guards = "isEmpty(string.tstring)") protected Object lstripBangEmptyString(RubyString string) { return nil; } - @Specialization( - guards = { "!isEmpty(string.rope)", "isSingleByteOptimizable(string, singleByteOptimizableNode)" }) + @Specialization(guards = "!isEmpty(string.tstring)") protected Object lstripBangSingleByte(RubyString string, - @Cached BytesNode bytesNode, - @Cached SingleByteOptimizableNode singleByteOptimizableNode, + @Cached RubyStringLibrary libString, + @Cached GetActualEncodingNode getActualEncodingNode, + @Cached CreateCodePointIteratorNode createCodePointIteratorNode, + @Cached TruffleStringIterator.NextNode nextNode, + @Cached BranchProfile allWhitespaceProfile, + @Cached BranchProfile nonSpaceCodePointProfile, + @Cached BranchProfile badCodePointProfile, @Cached ConditionProfile noopProfile) { - // Taken from org.jruby.RubyString#lstrip_bang19 and org.jruby.RubyString#singleByteLStrip. + var tstring = string.tstring; + var encoding = getActualEncodingNode.execute(tstring, libString.getEncoding(string)); + var tencoding = encoding.tencoding; - final Rope rope = string.rope; - final int firstCodePoint = getCodePointNode.executeGetCodePoint(string.encoding, rope, 0); + var iterator = createCodePointIteratorNode.execute(tstring, tencoding, ErrorHandling.RETURN_NEGATIVE); + int codePoint = nextNode.execute(iterator); + + // Check the first code point to see if it's broken. In the case of strings without leading spaces, + // this check can avoid having to compile the while loop. + if (codePoint == -1) { + badCodePointProfile.enter(); + throw new RaiseException(getContext(), + coreExceptions().argumentErrorInvalidByteSequence(encoding, this)); + } // Check the first code point to see if it's a space. In the case of strings without leading spaces, - // this check can avoid having to materialize the entire byte[] (a potentially expensive operation - // for ropes) and can avoid having to compile the while loop. - if (noopProfile.profile(!StringSupport.isAsciiSpaceOrNull(firstCodePoint))) { + // this check can avoid having to compile the while loop. + if (noopProfile.profile(!StringSupport.isAsciiSpaceOrNull(codePoint))) { return nil; } - final int end = rope.byteLength(); - final byte[] bytes = bytesNode.execute(rope); + while (iterator.hasNext()) { + int byteIndex = iterator.getByteIndex(); + codePoint = nextNode.execute(iterator); - int p = 0; - while (p < end && StringSupport.isAsciiSpaceOrNull(bytes[p])) { - p++; + if (codePoint == -1) { + badCodePointProfile.enter(); + throw new RaiseException(getContext(), + coreExceptions().argumentErrorInvalidByteSequence(encoding, this)); + } + + if (!StringSupport.isAsciiSpaceOrNull(codePoint)) { + nonSpaceCodePointProfile.enter(); + string.setTString(makeSubstring(tstring, tencoding, byteIndex)); + + return string; + } } - string.setRope(substringNode.executeSubstring(rope, p, end - p)); + // If we've made it this far, the string must consist only of whitespace. Otherwise, we would have exited + // early in the first code point check or in the iterator when the first non-space character was encountered. + allWhitespaceProfile.enter(); + string.setTString(tencoding.getEmpty()); return string; } - @TruffleBoundary - @Specialization( - guards = { "!isEmpty(string.rope)", "!isSingleByteOptimizable(string, singleByteOptimizableNode)" }) - protected Object lstripBang(RubyString string, - @Cached SingleByteOptimizableNode singleByteOptimizableNode, - @Cached GetActualEncodingNode getActualEncodingNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - // Taken from org.jruby.RubyString#lstrip_bang19 and org.jruby.RubyString#multiByteLStrip. - - final Rope rope = string.rope; - final RubyEncoding enc = getActualEncodingNode.execute(rope, strings.getEncoding(string)); - final int s = 0; - final int end = s + rope.byteLength(); - - int p = s; - while (p < end) { - int c = getCodePointNode.executeGetCodePoint(enc, rope, p); - if (!StringSupport.isAsciiSpaceOrNull(c)) { - break; - } - p += StringSupport.codeLength(enc.jcoding, c); + private AbstractTruffleString makeSubstring(AbstractTruffleString base, TruffleString.Encoding encoding, + int byteOffset) { + if (substringNode == null) { + CompilerDirectives.transferToInterpreterAndInvalidate(); + substringNode = insert(TruffleString.SubstringByteIndexNode.create()); } - if (p > s) { - string.setRope(substringNode.executeSubstring(rope, p - s, end - p)); - - return string; - } + int substringByteLength = base.byteLength(encoding) - byteOffset; - return nil; + return substringNode.execute(base, byteOffset, substringByteLength, encoding, true); } } @@ -1868,17 +1702,17 @@ protected Object lstripBang(RubyString string, @ImportStatic(StringGuards.class) public abstract static class OrdNode extends CoreMethodArrayArgumentsNode { - @Specialization(guards = { "isEmpty(strings.getRope(string))" }) + @Specialization(guards = { "isEmpty(strings.getTString(string))" }) protected int ordEmpty(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { throw new RaiseException(getContext(), coreExceptions().argumentError("empty string", this)); } - @Specialization(guards = { "!isEmpty(strings.getRope(string))" }) + @Specialization(guards = { "!isEmpty(strings.getTString(string))" }) protected int ord(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached GetCodePointNode getCodePointNode) { - return getCodePointNode.executeGetCodePoint(strings.getEncoding(string), strings.getRope(string), 0); + @Cached RubyStringLibrary strings, + @Cached StringHelperNodes.GetCodePointNode getCodePointNode) { + return getCodePointNode.executeGetCodePoint(strings.getTString(string), strings.getEncoding(string), 0); } } @@ -1898,16 +1732,20 @@ protected RubyString replaceStringIsSameAsOther(RubyString string, RubyString ot return string; } - @Specialization(guards = { "string != other" }) - protected RubyString replace(RubyString string, RubyString other) { - string.setRope(other.rope, other.encoding); + protected RubyString replace(RubyString string, RubyString other, + @Cached RubyStringLibrary libOther, + @Cached AsTruffleStringNode asTruffleStringNode) { + var encoding = libOther.getEncoding(other); + TruffleString immutableCopy = asTruffleStringNode.execute(other.tstring, encoding.tencoding); + string.setTString(immutableCopy, encoding); return string; } @Specialization - protected RubyString replace(RubyString string, ImmutableRubyString other) { - string.setRope(other.rope, other.getEncoding()); + protected RubyString replace(RubyString string, ImmutableRubyString other, + @Cached RubyStringLibrary libString) { + string.setTString(other.tstring, libString.getEncoding(other)); return string; } @@ -1917,92 +1755,81 @@ protected RubyString replace(RubyString string, ImmutableRubyString other) { @ImportStatic(StringGuards.class) public abstract static class RstripBangNode extends CoreMethodArrayArgumentsNode { - @Child private GetCodePointNode getCodePointNode = GetCodePointNode.create(); - @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode - .create(); - @Child private SubstringNode substringNode = SubstringNode.create(); + @Child TruffleString.SubstringByteIndexNode substringNode = TruffleString.SubstringByteIndexNode.create(); - @Specialization(guards = "isEmpty(string.rope)") + @Specialization(guards = "isEmpty(string.tstring)") protected Object rstripBangEmptyString(RubyString string) { return nil; } - @Specialization( - guards = { "!isEmpty(string.rope)", "isSingleByteOptimizable(string, singleByteOptimizableNode)" }) - protected Object rstripBangSingleByte(RubyString string, - @Cached BytesNode bytesNode, + @Specialization(guards = "!isEmpty(string.tstring)") + protected Object rstripBangNonEmptyString(RubyString string, + @Cached RubyStringLibrary libString, + @Cached GetActualEncodingNode getActualEncodingNode, + @Cached TruffleString.CreateBackwardCodePointIteratorNode createBackwardCodePointIteratorNode, + @Cached TruffleStringIterator.PreviousNode previousNode, + @Cached BranchProfile allWhitespaceProfile, + @Cached BranchProfile nonSpaceCodePointProfile, + @Cached BranchProfile badCodePointProfile, @Cached @Exclusive ConditionProfile noopProfile) { - // Taken from org.jruby.RubyString#rstrip_bang19 and org.jruby.RubyString#singleByteRStrip19. + var tstring = string.tstring; + var encoding = getActualEncodingNode.execute(tstring, libString.getEncoding(string)); + var tencoding = encoding.tencoding; - final Rope rope = string.rope; - final int lastCodePoint = getCodePointNode - .executeGetCodePoint(string.encoding, rope, rope.byteLength() - 1); + var iterator = createBackwardCodePointIteratorNode.execute(tstring, tencoding, + ErrorHandling.RETURN_NEGATIVE); + int codePoint = previousNode.execute(iterator); - // Check the last code point to see if it's a space or NULL. In the case of strings without leading spaces, - // this check can avoid having to materialize the entire byte[] (a potentially expensive operation - // for ropes) and can avoid having to compile the while loop. - final boolean willStrip = StringSupport.isAsciiSpaceOrNull(lastCodePoint); - if (noopProfile.profile(!willStrip)) { - return nil; + // Check the last code point to see if it's broken. In the case of strings without trailing spaces, + // this check can avoid having to compile the while loop. + if (codePoint == -1) { + badCodePointProfile.enter(); + throw new RaiseException(getContext(), + coreExceptions().argumentErrorInvalidByteSequence(encoding, this)); } - final int end = rope.byteLength(); - final byte[] bytes = bytesNode.execute(rope); - - int endp = end - 1; - while (endp >= 0 && StringSupport.isAsciiSpaceOrNull(bytes[endp])) { - endp--; + // Check the last code point to see if it's a space. In the case of strings without trailing spaces, + // this check can avoid having to compile the while loop. + if (noopProfile.profile(!StringSupport.isAsciiSpaceOrNull(codePoint))) { + return nil; } - string.setRope(substringNode.executeSubstring(rope, 0, endp + 1)); - - return string; - } - - @TruffleBoundary - @Specialization( - guards = { "!isEmpty(string.rope)", "!isSingleByteOptimizable(string, singleByteOptimizableNode)" }) - protected Object rstripBang(RubyString string, - @Cached GetActualEncodingNode getActualEncodingNode, - @Cached @Exclusive ConditionProfile dummyEncodingProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - // Taken from org.jruby.RubyString#rstrip_bang19 and org.jruby.RubyString#multiByteRStrip19. - - final Rope rope = string.rope; - final RubyEncoding enc = getActualEncodingNode.execute(rope, strings.getEncoding(string)); + while (iterator.hasPrevious()) { + int byteIndex = iterator.getByteIndex(); + codePoint = previousNode.execute(iterator); - if (dummyEncodingProfile.profile(enc.jcoding.isDummy())) { - throw new RaiseException( - getContext(), - coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(enc.jcoding, this)); - } + if (codePoint == -1) { + badCodePointProfile.enter(); + throw new RaiseException(getContext(), + coreExceptions().argumentErrorInvalidByteSequence(encoding, this)); + } - final byte[] bytes = rope.getBytes(); - final int start = 0; - final int end = rope.byteLength(); + if (!StringSupport.isAsciiSpaceOrNull(codePoint)) { + nonSpaceCodePointProfile.enter(); + string.setTString(makeSubstring(tstring, tencoding, byteIndex)); - int endp = end; - int prev; - while ((prev = prevCharHead(enc.jcoding, bytes, start, endp, end)) != -1) { - int point = getCodePointNode.executeGetCodePoint(enc, rope, prev); - if (!StringSupport.isAsciiSpaceOrNull(point)) { - break; + return string; } - endp = prev; } - if (endp < end) { - string.setRope(substringNode.executeSubstring(rope, 0, endp - start)); + // If we've made it this far, the string must consist only of whitespace. Otherwise, we would have exited + // early in the first code point check or in the iterator when the first non-space character was encountered. + allWhitespaceProfile.enter(); + string.setTString(tencoding.getEmpty()); - return string; - } - return nil; + return string; } - @TruffleBoundary - private int prevCharHead(Encoding enc, byte[] bytes, int p, int s, int end) { - return enc.prevCharHead(bytes, p, s, end); + private AbstractTruffleString makeSubstring(AbstractTruffleString base, TruffleString.Encoding encoding, + int byteEnd) { + if (substringNode == null) { + CompilerDirectives.transferToInterpreterAndInvalidate(); + substringNode = insert(TruffleString.SubstringByteIndexNode.create()); + } + + return substringNode.execute(base, 0, byteEnd, encoding, true); } + } @Primitive(name = "string_scrub") @@ -2010,47 +1837,48 @@ private int prevCharHead(Encoding enc, byte[] bytes, int p, int s, int end) { public abstract static class ScrubNode extends PrimitiveArrayArgumentsNode { @Child private CallBlockNode yieldNode = CallBlockNode.create(); - @Child CodeRangeNode codeRangeNode = CodeRangeNode.create(); - @Child private ConcatNode concatNode = ConcatNode.create(); - @Child private SubstringNode substringNode = SubstringNode.create(); - @Child private MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); - @Child private CalculateCharacterLengthNode calculateCharacterLengthNode = CalculateCharacterLengthNode + @Child GetByteCodeRangeNode codeRangeNode = GetByteCodeRangeNode.create(); + @Child private TruffleString.ConcatNode concatNode = TruffleString.ConcatNode.create(); + @Child private TruffleString.GetInternalByteArrayNode byteArrayNode = TruffleString.GetInternalByteArrayNode .create(); - @Child private BytesNode bytesNode = BytesNode.create(); + @Child TruffleString.SubstringByteIndexNode substringNode = TruffleString.SubstringByteIndexNode.create(); @Specialization( - guards = { - "isBrokenCodeRange(rope, codeRangeNode)", - "isAsciiCompatible(rope)" }) + guards = { "isBrokenCodeRange(tstring, encoding, codeRangeNode)", "isAsciiCompatible(encoding)" }) protected RubyString scrubAsciiCompat(Object string, RubyProc block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Bind("strings.getRope(string)") Rope rope) { - final Encoding enc = rope.getEncoding(); - final CodeRange cr = codeRangeNode.execute(rope); - Rope buf = RopeConstants.EMPTY_ASCII_8BIT_ROPE; + @Cached RubyStringLibrary strings, + @Cached TruffleString.ByteLengthOfCodePointNode byteLengthOfCodePointNode, + @Bind("strings.getTString(string)") AbstractTruffleString tstring, + @Bind("strings.getEncoding(string)") RubyEncoding encoding) { + final Encoding enc = encoding.jcoding; + var tencoding = encoding.tencoding; + TruffleString buf = EMPTY_BINARY; - final byte[] pBytes = bytesNode.execute(rope); - final int e = pBytes.length; + var byteArray = byteArrayNode.execute(tstring, tencoding); + final int e = tstring.byteLength(tencoding); int p = 0; - int p1 = 0; + int p1 = p; - p = StringSupport.searchNonAscii(pBytes, p, e); - if (p == -1) { + p = StringSupport.searchNonAscii(byteArray, p); + if (p < 0) { p = e; } while (p < e) { - int ret = calculateCharacterLengthNode.characterLength(enc, CR_BROKEN, Bytes.fromRange(pBytes, p, e)); - if (MBCLEN_NEEDMORE_P(ret)) { + int clen = byteLengthOfCodePointNode.execute(tstring, p, tencoding, + ErrorHandling.RETURN_NEGATIVE); + if (MBCLEN_NEEDMORE_P(clen)) { break; - } else if (MBCLEN_CHARFOUND_P(ret)) { - p += MBCLEN_CHARFOUND_LEN(ret); - } else if (MBCLEN_INVALID_P(ret)) { + } else if (MBCLEN_CHARFOUND_P(clen)) { + p += MBCLEN_CHARFOUND_LEN(clen); + } else if (MBCLEN_INVALID_P(clen)) { // p1~p: valid ascii/multibyte chars // p ~e: invalid bytes + unknown bytes - int clen = enc.maxLength(); - if (p1 < p) { - buf = concatNode.executeConcat(buf, substringNode.executeSubstring(rope, p1, p - p1), enc); + clen = enc.maxLength(); + if (p > p1) { + buf = concatNode.execute(buf, + substringNode.execute(tstring, p1, p - p1, tencoding, true), + tencoding, true); } if (e - p < clen) { @@ -2061,68 +1889,74 @@ protected RubyString scrubAsciiCompat(Object string, RubyProc block, } else { clen--; for (; clen > 1; clen--) { - ret = StringSupport.characterLength(enc, cr, pBytes, p, p + clen); - if (MBCLEN_NEEDMORE_P(ret)) { + var subTString = substringNode.execute(tstring, p, clen, tencoding, true); + int clen2 = byteLengthOfCodePointNode.execute(subTString, 0, tencoding, + ErrorHandling.RETURN_NEGATIVE); + if (MBCLEN_NEEDMORE_P(clen2)) { break; } } } - final Rope subStringRope = substringNode.executeSubstring(rope, p, clen); - Object repl = yieldNode - .yield(block, makeStringNode.fromRope(subStringRope, strings.getEncoding(string))); - buf = concatNode.executeConcat(buf, strings.getRope(repl), enc); + Object repl = yieldNode.yield(block, + createSubString(substringNode, tstring, encoding, p, clen)); + buf = concatNode.execute(buf, strings.getTString(repl), tencoding, true); p += clen; p1 = p; - p = StringSupport.searchNonAscii(pBytes, p, e); - if (p == -1) { + p = StringSupport.searchNonAscii(byteArray, p); + if (p < 0) { p = e; break; } } } + if (p1 < p) { - buf = concatNode.executeConcat(buf, substringNode.executeSubstring(rope, p1, p - p1), enc); + buf = concatNode.execute(buf, + substringNode.execute(tstring, p1, p - p1, tencoding, true), tencoding, + true); } + if (p < e) { - final Rope subStringRope = substringNode.executeSubstring(rope, p, e - p); - Object repl = yieldNode - .yield(block, makeStringNode.fromRope(subStringRope, strings.getEncoding(string))); - buf = concatNode.executeConcat(buf, strings.getRope(repl), enc); + Object repl = yieldNode.yield(block, + createSubString(substringNode, tstring, encoding, p, e - p)); + buf = concatNode.execute(buf, strings.getTString(repl), tencoding, true); } - return makeStringNode.fromRope(buf, strings.getEncoding(string)); + + return createString(buf, encoding); } @Specialization( guards = { - "isBrokenCodeRange(rope, codeRangeNode)", - "!isAsciiCompatible(rope)" }) + "isBrokenCodeRange(tstring, encoding, codeRangeNode)", + "!isAsciiCompatible(encoding)" }) protected RubyString scrubAsciiIncompatible(Object string, RubyProc block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Bind("strings.getRope(string)") Rope rope, - @Cached CalculateCharacterLengthNode calculateCharacterLengthNode) { - final Encoding enc = rope.getEncoding(); - final CodeRange cr = codeRangeNode.execute(rope); - Rope buf = RopeConstants.EMPTY_ASCII_8BIT_ROPE; - - final byte[] pBytes = bytesNode.execute(rope); - final int e = pBytes.length; + @Cached RubyStringLibrary strings, + @Cached TruffleString.ByteLengthOfCodePointNode byteLengthOfCodePointNode, + @Bind("strings.getTString(string)") AbstractTruffleString tstring, + @Bind("strings.getEncoding(string)") RubyEncoding encoding) { + final Encoding enc = encoding.jcoding; + var tencoding = encoding.tencoding; + TruffleString buf = EMPTY_BINARY; + final int e = tstring.byteLength(tencoding); int p = 0; - int p1 = 0; + int p1 = p; final int mbminlen = enc.minLength(); while (p < e) { - int ret = calculateCharacterLengthNode.characterLength(enc, CR_BROKEN, Bytes.fromRange(pBytes, p, e)); - if (MBCLEN_NEEDMORE_P(ret)) { + int clen = byteLengthOfCodePointNode.execute(tstring, p, tencoding, ErrorHandling.RETURN_NEGATIVE); + if (MBCLEN_NEEDMORE_P(clen)) { break; - } else if (MBCLEN_CHARFOUND_P(ret)) { - p += MBCLEN_CHARFOUND_LEN(ret); - } else if (MBCLEN_INVALID_P(ret)) { + } else if (MBCLEN_CHARFOUND_P(clen)) { + p += MBCLEN_CHARFOUND_LEN(clen); + } else if (MBCLEN_INVALID_P(clen)) { final int q = p; - int clen = enc.maxLength(); + clen = enc.maxLength(); - if (p1 < p) { - buf = concatNode.executeConcat(buf, substringNode.executeSubstring(rope, p1, p - p1), enc); + if (p > p1) { + buf = concatNode.execute(buf, + substringNode.execute(tstring, p1, p - p1, tencoding, true), + tencoding, true); } if (e - p < clen) { @@ -2133,34 +1967,36 @@ protected RubyString scrubAsciiIncompatible(Object string, RubyProc block, } else { clen -= mbminlen; for (; clen > mbminlen; clen -= mbminlen) { - ret = calculateCharacterLengthNode.characterLength(enc, cr, new Bytes(pBytes, q, clen)); - if (MBCLEN_NEEDMORE_P(ret)) { + var subTString = substringNode.execute(tstring, q, clen, tencoding, true); + int clen2 = byteLengthOfCodePointNode.execute(subTString, 0, tencoding, + ErrorHandling.RETURN_NEGATIVE); + if (MBCLEN_NEEDMORE_P(clen2)) { break; } } } - final Rope subStringRope = substringNode.executeSubstring(rope, p, clen); - RubyString repl = (RubyString) yieldNode.yield( - block, - makeStringNode.fromRope(subStringRope, strings.getEncoding(string))); - buf = concatNode.executeConcat(buf, repl.rope, enc); + RubyString repl = (RubyString) yieldNode.yield(block, + createSubString(substringNode, tstring, encoding, p, clen)); + buf = concatNode.execute(buf, repl.tstring, tencoding, true); p += clen; p1 = p; } } + if (p1 < p) { - buf = concatNode.executeConcat(buf, substringNode.executeSubstring(rope, p1, p - p1), enc); + buf = concatNode.execute(buf, substringNode.execute(tstring, p1, p - p1, tencoding, true), + tencoding, + true); } + if (p < e) { - final Rope subStringRope = substringNode.executeSubstring(rope, p, e - p); - RubyString repl = (RubyString) yieldNode.yield( - block, - makeStringNode.fromRope(subStringRope, strings.getEncoding(string))); - buf = concatNode.executeConcat(buf, repl.rope, enc); + RubyString repl = (RubyString) yieldNode.yield(block, + createSubString(substringNode, tstring, encoding, p, e - p)); + buf = concatNode.execute(buf, repl.tstring, tencoding, true); } - return makeStringNode.fromRope(buf, strings.getEncoding(string)); + return createString(buf, encoding); } } @@ -2169,74 +2005,53 @@ protected RubyString scrubAsciiIncompatible(Object string, RubyProc block, @ImportStatic({ StringGuards.class, Config.class }) public abstract static class StringSwapcaseBangPrimitiveNode extends PrimitiveArrayArgumentsNode { - @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode - .create(); - - @Specialization(guards = { "isSingleByteCaseMapping(string, caseMappingOptions, singleByteOptimizableNode)" }) - protected Object swapcaseSingleByte(RubyString string, int caseMappingOptions, - @Cached("createSwapCase()") InvertAsciiCaseNode invertAsciiCaseNode) { - return invertAsciiCaseNode.executeInvert(string); - } - - @Specialization(guards = { "isSimpleAsciiCaseMapping(string, caseMappingOptions, singleByteOptimizableNode)" }) - protected Object swapcaseMultiByteAsciiSimple(RubyString string, int caseMappingOptions, - @Cached @Shared("bytesNode") BytesNode bytesNode, - @Cached CharacterLengthNode characterLengthNode, - @Cached @Shared("codeRangeNode") CodeRangeNode codeRangeNode, - @Cached @Shared("makeLeafRopeNode") MakeLeafRopeNode makeLeafRopeNode, - @Cached @Shared("dummyEncodingProfile") ConditionProfile dummyEncodingProfile, - @Cached @Shared("modifiedProfile") ConditionProfile modifiedProfile) { - // Taken from org.jruby.RubyString#swapcase_bang19. - - final Rope rope = string.rope; - final Encoding enc = rope.getEncoding(); + @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode.create(); + private final ConditionProfile dummyEncodingProfile = ConditionProfile.createBinaryProfile(); - if (dummyEncodingProfile.profile(enc.isDummy())) { + @Specialization( + guards = "!isComplexCaseMapping(tstring, encoding, caseMappingOptions, singleByteOptimizableNode)") + protected Object swapcaseAsciiCodePoints(RubyString string, int caseMappingOptions, + @Cached RubyStringLibrary libString, + @Cached("createSwapCase()") StringHelperNodes.InvertAsciiCaseNode invertAsciiCaseNode, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { + if (dummyEncodingProfile.profile(encoding.isDummy)) { throw new RaiseException( getContext(), - coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(enc, this)); + coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(encoding, this)); } - final CodeRange cr = codeRangeNode.execute(rope); - final byte[] inputBytes = bytesNode.execute(rope); - final byte[] outputBytes = StringSupport.swapcaseMultiByteAsciiSimple(enc, cr, inputBytes); - - if (modifiedProfile.profile(inputBytes != outputBytes)) { - string.setRope( - makeLeafRopeNode.executeMake(outputBytes, enc, cr, characterLengthNode.execute(rope))); - return string; - } else { - return nil; - } + return invertAsciiCaseNode.executeInvert(string); } - @Specialization(guards = "isComplexCaseMapping(string, caseMappingOptions, singleByteOptimizableNode)") + @Specialization( + guards = "isComplexCaseMapping(tstring, encoding, caseMappingOptions, singleByteOptimizableNode)") protected Object swapcaseMultiByteComplex(RubyString string, int caseMappingOptions, - @Cached @Shared("bytesNode") BytesNode bytesNode, - @Cached @Shared("codeRangeNode") CodeRangeNode codeRangeNode, - @Cached @Shared("makeLeafRopeNode") MakeLeafRopeNode makeLeafRopeNode, - @Cached @Shared("dummyEncodingProfile") ConditionProfile dummyEncodingProfile, - @Cached @Shared("modifiedProfile") ConditionProfile modifiedProfile) { + @Cached RubyStringLibrary libString, + @Cached GetByteCodeRangeNode codeRangeNode, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode, + @Cached ConditionProfile modifiedProfile, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { // Taken from org.jruby.RubyString#swapcase_bang19. - - final Rope rope = string.rope; - final Encoding enc = rope.getEncoding(); - - if (dummyEncodingProfile.profile(enc.isDummy())) { + if (dummyEncodingProfile.profile(encoding.isDummy)) { throw new RaiseException( getContext(), - coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(enc, this)); + coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(encoding, this)); } - final RopeBuilder builder = RopeBuilder.createRopeBuilder(bytesNode.execute(rope), rope.getEncoding()); + var byteArray = byteArrayNode.execute(tstring, encoding.tencoding); + + // TODO (nirvdrum 24-Jun-22): Make the byte array builder copy-on-write so we don't eagerly clone the source byte array. + var builder = ByteArrayBuilder.create(byteArray); + + var cr = codeRangeNode.execute(string.tstring, encoding.tencoding); final boolean modified = StringSupport - .swapCaseMultiByteComplex(enc, codeRangeNode.execute(rope), builder, caseMappingOptions, this); + .swapCaseMultiByteComplex(encoding.jcoding, cr, builder, caseMappingOptions, this); if (modifiedProfile.profile(modified)) { - string.setRope( - makeLeafRopeNode - .executeMake(builder.getBytes(), rope.getEncoding(), CR_UNKNOWN, NotProvided.INSTANCE)); - + string.setTString(fromByteArrayNode.execute(builder.getBytes(), encoding.tencoding, false)); return string; } else { return nil; @@ -2248,71 +2063,44 @@ protected Object swapcaseMultiByteComplex(RubyString string, int caseMappingOpti @ImportStatic(StringGuards.class) public abstract static class DumpNode extends CoreMethodArrayArgumentsNode { - @Child private MakeLeafRopeNode makeLeafRopeNode = MakeLeafRopeNode.create(); + private static final byte[] FORCE_ENCODING_CALL_BYTES = StringOperations.encodeAsciiBytes(".force_encoding(\""); @TruffleBoundary - @Specialization(guards = "isAsciiCompatible(libString.getRope(string))") + @Specialization(guards = "isAsciiCompatible(libString.getEncoding(string))") protected RubyString dumpAsciiCompatible(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - // Taken from org.jruby.RubyString#dump + @Cached RubyStringLibrary libString, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { + ByteArrayBuilder outputBytes = dumpCommon(new ATStringWithEncoding(libString, string)); - RopeBuilder outputBytes = dumpCommon(libString.getRope(string)); - outputBytes.setEncoding(libString.getRope(string).getEncoding()); - - final Rope rope = makeLeafRopeNode - .executeMake(outputBytes.getBytes(), outputBytes.getEncoding(), CR_7BIT, outputBytes.getLength()); - - final RubyString result = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - rope, - libString.getEncoding(string)); - AllocationTracing.trace(result, this); - return result; + return createString(fromByteArrayNode, outputBytes.getBytes(), libString.getEncoding(string)); } @TruffleBoundary - @Specialization(guards = "!isAsciiCompatible(libString.getRope(string))") + @Specialization(guards = "!isAsciiCompatible(libString.getEncoding(string))") protected RubyString dump(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - // Taken from org.jruby.RubyString#dump - - RopeBuilder outputBytes = dumpCommon(libString.getRope(string)); - - try { - outputBytes.append(".force_encoding(\"".getBytes("UTF-8")); - } catch (UnsupportedEncodingException e) { - throw new UnsupportedOperationException(e); - } + @Cached RubyStringLibrary libString, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { + ByteArrayBuilder outputBytes = dumpCommon(new ATStringWithEncoding(libString, string)); - outputBytes.append(libString.getRope(string).getEncoding().getName()); + outputBytes.append(FORCE_ENCODING_CALL_BYTES); + outputBytes.append(libString.getEncoding(string).jcoding.getName()); outputBytes.append((byte) '"'); outputBytes.append((byte) ')'); - outputBytes.setEncoding(ASCIIEncoding.INSTANCE); - - final Rope rope = makeLeafRopeNode - .executeMake(outputBytes.getBytes(), outputBytes.getEncoding(), CR_7BIT, outputBytes.getLength()); - - final RubyString result = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - rope, - Encodings.BINARY); - AllocationTracing.trace(result, this); - return result; + return createString(fromByteArrayNode, outputBytes.getBytes(), Encodings.BINARY); } - private RopeBuilder dumpCommon(Rope rope) { - RopeBuilder buf = null; - final Encoding enc = rope.getEncoding(); - final CodeRange cr = rope.getCodeRange(); + // Taken from org.jruby.RubyString#dump + private ByteArrayBuilder dumpCommon(ATStringWithEncoding string) { + ByteArrayBuilder buf = null; + final var enc = string.encoding.jcoding; + final var cr = string.getCodeRange(); - int p = 0; - int end = rope.byteLength(); - byte[] bytes = rope.getBytes(); + var byteArray = string.getInternalByteArray(); + final int offset = byteArray.getOffset(); + int p = offset; + final int end = byteArray.getEnd(); + byte[] bytes = byteArray.getArray(); int len = 2; while (p < end) { @@ -2332,19 +2120,19 @@ private RopeBuilder dumpCommon(Rope rope) { len += 2; break; case '#': - len += isEVStr(bytes, p, end) ? 2 : 1; + len += p < end && isEVStr(bytes[p] & 0xff) ? 2 : 1; break; default: if (ASCIIEncoding.INSTANCE.isPrint(c)) { len++; } else { if (enc.isUTF8()) { - int n = StringSupport.characterLength(enc, cr, bytes, p - 1, end) - 1; + int n = string.characterLength(p - 1 - offset) - 1; if (n > 0) { if (buf == null) { - buf = new RopeBuilder(); + buf = new ByteArrayBuilder(); } - int cc = StringSupport.codePoint(enc, rope.getCodeRange(), bytes, p - 1, end, this); + int cc = StringSupport.codePoint(enc, cr, bytes, p - 1, end, this); buf.append(StringUtils.formatASCIIBytes("%x", cc)); len += buf.getLength() + 4; buf.setLength(0); @@ -2359,15 +2147,14 @@ private RopeBuilder dumpCommon(Rope rope) { } if (!enc.isAsciiCompatible()) { - len += ".force_encoding(\"".length() + enc.getName().length + "\")".length(); + len += FORCE_ENCODING_CALL_BYTES.length + enc.getName().length + "\")".length(); } - RopeBuilder outBytes = new RopeBuilder(); + TStringBuilder outBytes = new TStringBuilder(); outBytes.unsafeEnsureSpace(len); - byte out[] = outBytes.getUnsafeBytes(); + byte[] out = outBytes.getUnsafeBytes(); int q = 0; - p = 0; - end = rope.byteLength(); + p = offset; out[q++] = '"'; while (p < end) { @@ -2376,7 +2163,7 @@ private RopeBuilder dumpCommon(Rope rope) { out[q++] = '\\'; out[q++] = (byte) c; } else if (c == '#') { - if (isEVStr(bytes, p, end)) { + if (p < end && isEVStr(bytes[p] & 0xff)) { out[q++] = '\\'; } out[q++] = '#'; @@ -2409,7 +2196,7 @@ private RopeBuilder dumpCommon(Rope rope) { } else { out[q++] = '\\'; if (enc.isUTF8()) { - int n = StringSupport.characterLength(enc, cr, bytes, p - 1, end) - 1; + int n = string.characterLength(p - 1 - offset) - 1; if (n > 0) { int cc = StringSupport.codePoint(enc, cr, bytes, p - 1, end, this); p += n; @@ -2431,10 +2218,6 @@ private RopeBuilder dumpCommon(Rope rope) { return outBytes; } - private static boolean isEVStr(byte[] bytes, int p, int end) { - return p < end ? isEVStr(bytes[p] & 0xff) : false; - } - private static boolean isEVStr(int c) { return c == '$' || c == '@' || c == '{'; } @@ -2444,30 +2227,31 @@ private static boolean isEVStr(int c) { @CoreMethod(names = "undump") @ImportStatic(StringGuards.class) public abstract static class UndumpNode extends CoreMethodArrayArgumentsNode { - @Specialization(guards = "isAsciiCompatible(libString.getRope(string))") + + @Specialization(guards = "isAsciiCompatible(libString.getEncoding(string))") protected RubyString undumpAsciiCompatible(Object string, - @Cached MakeStringNode makeStringNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached TruffleString.FromByteArrayNode fromByteArrayNode, + @Cached RubyStringLibrary libString) { // Taken from org.jruby.RubyString#undump - Pair outputBytesResult = StringSupport.undump( - libString.getRope(string), - libString.getEncoding(string), + var encoding = libString.getEncoding(string); + Pair outputBytesResult = StringSupport.undump( + new ATStringWithEncoding(libString.getTString(string), encoding), + encoding, getContext(), this); final RubyEncoding rubyEncoding = outputBytesResult.getRight(); - return makeStringNode.fromBuilder(outputBytesResult.getLeft(), rubyEncoding, CR_UNKNOWN); + return createString(outputBytesResult.getLeft().toTStringUnsafe(fromByteArrayNode), rubyEncoding); } - @Specialization(guards = "!isAsciiCompatible(libString.getRope(string))") + @Specialization(guards = "!isAsciiCompatible(libString.getEncoding(string))") protected RubyString undumpNonAsciiCompatible(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached RubyStringLibrary libString) { throw new RaiseException( getContext(), getContext().getCoreExceptions().encodingCompatibilityError( - Utils.concat("ASCII incompatible encoding: ", libString.getRope(string).encoding), + Utils.concat("ASCII incompatible encoding: ", libString.getEncoding(string)), this)); } - } @CoreMethod(names = "setbyte", required = 2, raiseIfNotMutableSelf = true, lowerFixnum = { 1, 2 }) @@ -2477,8 +2261,8 @@ protected RubyString undumpNonAsciiCompatible(Object string, @ImportStatic(StringGuards.class) public abstract static class SetByteNode extends CoreMethodNode { - @Child private CheckIndexNode checkIndexNode = CheckIndexNodeGen.create(); - @Child private RopeNodes.SetByteNode setByteNode = RopeNodes.SetByteNode.create(); + @Child private StringHelperNodes.CheckIndexNode checkIndexNode = StringHelperNodesFactory.CheckIndexNodeGen + .create(); @CreateCast("index") protected ToIntNode coerceIndexToInt(RubyBaseNodeWithExecute index) { @@ -2490,72 +2274,32 @@ protected ToIntNode coerceValueToInt(RubyBaseNodeWithExecute value) { return ToIntNode.create(value); } - public abstract int executeSetByte(RubyString string, int index, Object value); - - @Specialization - protected int setByte(RubyString string, int index, int value, - @Cached ConditionProfile newRopeProfile) { - final Rope rope = string.rope; - final int normalizedIndex = checkIndexNode.executeCheck(index, rope.byteLength()); - - final Rope newRope = setByteNode.executeSetByte(rope, normalizedIndex, value); - if (newRopeProfile.profile(newRope != rope)) { - string.setRope(newRope); - } + @Specialization(guards = "tstring.isMutable()") + protected int mutable(RubyString string, int index, int value, + @Cached RubyStringLibrary libString, + @Bind("string.tstring") AbstractTruffleString tstring, + @Cached MutableTruffleString.WriteByteNode writeByteNode) { + var tencoding = libString.getTEncoding(string); + final int normalizedIndex = checkIndexNode.executeCheck(index, tstring.byteLength(tencoding)); + writeByteNode.execute((MutableTruffleString) tstring, normalizedIndex, (byte) value, tencoding); return value; } - } - - public abstract static class CheckIndexNode extends RubyBaseNode { - - public abstract int executeCheck(int index, int length); - - @Specialization - protected int checkIndex(int index, int length, - @Cached ConditionProfile negativeIndexProfile, - @Cached BranchProfile errorProfile) { - if (index >= length) { - errorProfile.enter(); - throw new RaiseException( - getContext(), - getContext().getCoreExceptions().indexErrorOutOfString(index, this)); - } - - if (negativeIndexProfile.profile(index < 0)) { - index += length; - if (index < 0) { - errorProfile.enter(); - throw new RaiseException( - getContext(), - getContext().getCoreExceptions().indexErrorOutOfString(index, this)); - } - } - - return index; - } - - } - - public abstract static class NormalizeIndexNode extends RubyBaseNode { - - public abstract int executeNormalize(int index, int length); - - public static NormalizeIndexNode create() { - return NormalizeIndexNodeGen.create(); - } - - @Specialization - protected int normalizeIndex(int index, int length, - @Cached ConditionProfile negativeIndexProfile) { - if (negativeIndexProfile.profile(index < 0)) { - return index + length; - } + @Specialization(guards = "!tstring.isMutable()") + protected int immutable(RubyString string, int index, int value, + @Cached RubyStringLibrary libString, + @Bind("string.tstring") AbstractTruffleString tstring, + @Cached MutableTruffleString.AsMutableTruffleStringNode asMutableTruffleStringNode, + @Cached MutableTruffleString.WriteByteNode writeByteNode) { + var tencoding = libString.getTEncoding(string); + final int normalizedIndex = checkIndexNode.executeCheck(index, tstring.byteLength(tencoding)); - return index; + MutableTruffleString mutableTString = asMutableTruffleStringNode.execute(tstring, tencoding); + writeByteNode.execute(mutableTString, normalizedIndex, (byte) value, tencoding); + string.setTString(mutableTString); + return value; } - } @CoreMethod(names = { "size", "length" }) @@ -2570,9 +2314,9 @@ public static SizeNode create() { @Specialization protected int size(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString, - @Cached CharacterLengthNode characterLengthNode) { - return characterLengthNode.execute(libString.getRope(string)); + @Cached RubyStringLibrary libString, + @Cached CodePointLengthNode codePointLengthNode) { + return codePointLengthNode.execute(libString.getTString(string), libString.getTEncoding(string)); } } @@ -2582,52 +2326,47 @@ protected int size(Object string, public abstract static class SqueezeBangNode extends CoreMethodArrayArgumentsNode { @Child private CheckEncodingNode checkEncodingNode; - private final ConditionProfile singleByteOptimizableProfile = ConditionProfile.create(); - @Specialization(guards = "isEmpty(string.rope)") + @Specialization(guards = "isEmpty(string.tstring)") protected Object squeezeBangEmptyString(RubyString string, Object[] args) { return nil; } @TruffleBoundary - @Specialization(guards = { "!isEmpty(string.rope)", "noArguments(args)" }) - protected Object squeezeBangZeroArgs(RubyString string, Object[] args) { + @Specialization(guards = { "!isEmpty(string.tstring)", "noArguments(args)" }) + protected Object squeezeBangZeroArgs(RubyString string, Object[] args, + @Cached SingleByteOptimizableNode singleByteOptimizableNode) { // Taken from org.jruby.RubyString#squeeze_bang19. - final Rope rope = string.rope; - final RopeBuilder buffer = RopeOperations.toRopeBuilderCopy(rope); + final TStringBuilder buffer = TStringBuilder.create(string); - final boolean squeeze[] = new boolean[StringSupport.TRANS_SIZE]; + final boolean[] squeeze = new boolean[StringSupport.TRANS_SIZE]; for (int i = 0; i < StringSupport.TRANS_SIZE; i++) { squeeze[i] = true; } - if (singleByteOptimizableProfile.profile(rope.isSingleByteOptimizable())) { + if (StringGuards.isSingleByteOptimizable(string.tstring, string.getEncodingUncached(), + singleByteOptimizableNode)) { if (!StringSupport.singleByteSqueeze(buffer, squeeze)) { return nil; } else { - string.setRope(RopeOperations.ropeFromRopeBuilder(buffer)); + string.setTString(buffer.toTString(), buffer.getRubyEncoding()); } } else { - if (!StringSupport - .multiByteSqueeze( - buffer, - rope.getCodeRange(), - squeeze, - null, - string.rope.getEncoding(), - false, - this)) { + var codeRange = string.tstring + .getByteCodeRangeUncached(RubyStringLibrary.getUncached().getTEncoding(string)); + if (!StringSupport.multiByteSqueeze(buffer, codeRange, squeeze, null, + string.getEncodingUncached().jcoding, false, this)) { return nil; } else { - string.setRope(RopeOperations.ropeFromRopeBuilder(buffer)); + string.setTString(buffer.toTString(), buffer.getRubyEncoding()); } } return string; } - @Specialization(guards = { "!isEmpty(string.rope)", "!noArguments(args)" }) + @Specialization(guards = { "!isEmpty(string.tstring)", "!noArguments(args)" }) protected Object squeezeBang(VirtualFrame frame, RubyString string, Object[] args, @Cached ToStrNode toStrNode) { // Taken from org.jruby.RubyString#squeeze_bang19. @@ -2648,49 +2387,53 @@ private Object performSqueezeBang(RubyString string, Object[] otherStrings) { checkEncodingNode = insert(CheckEncodingNode.create()); } - final Rope rope = string.rope; - final RopeBuilder buffer = RopeOperations.toRopeBuilderCopy(rope); + final TStringBuilder buffer = TStringBuilder.create(string); Object otherStr = otherStrings[0]; - Rope otherRope = RubyStringLibrary.getUncached().getRope(otherStr); + var otherRope = RubyStringLibrary.getUncached().getTString(otherStr); + var otherEncoding = RubyStringLibrary.getUncached().getEncoding(otherStr); RubyEncoding enc = checkEncodingNode.executeCheckEncoding(string, otherStr); final boolean squeeze[] = new boolean[StringSupport.TRANS_SIZE + 1]; - boolean singlebyte = rope.isSingleByteOptimizable() && otherRope.isSingleByteOptimizable(); + boolean singlebyte = TStringUtils.isSingleByteOptimizable(string.tstring, string.getEncodingUncached()) && + TStringUtils.isSingleByteOptimizable(otherRope, otherEncoding); - if (singlebyte && otherRope.byteLength() == 1 && otherStrings.length == 1) { - squeeze[otherRope.getRawBytes()[0]] = true; + if (singlebyte && otherRope.byteLength(otherEncoding.tencoding) == 1 && otherStrings.length == 1) { + squeeze[otherRope.readByteUncached(0, otherEncoding.tencoding)] = true; if (!StringSupport.singleByteSqueeze(buffer, squeeze)) { return nil; } else { - string.setRope(RopeOperations.ropeFromRopeBuilder(buffer)); + string.setTString(buffer.toTString(), buffer.getRubyEncoding()); return string; } } StringSupport.TrTables tables = StringSupport - .trSetupTable(otherRope, squeeze, null, true, enc.jcoding, this); + .trSetupTable(otherRope, otherEncoding, squeeze, null, true, enc.jcoding, this); for (int i = 1; i < otherStrings.length; i++) { otherStr = otherStrings[i]; - otherRope = RubyStringLibrary.getUncached().getRope(otherStr); + otherRope = RubyStringLibrary.getUncached().getTString(otherStr); + otherEncoding = RubyStringLibrary.getUncached().getEncoding(otherStr); enc = checkEncodingNode.executeCheckEncoding(string, otherStr); - singlebyte = singlebyte && otherRope.isSingleByteOptimizable(); - tables = StringSupport.trSetupTable(otherRope, squeeze, tables, false, enc.jcoding, this); + singlebyte = singlebyte && TStringUtils.isSingleByteOptimizable(otherRope, otherEncoding); + tables = StringSupport.trSetupTable(otherRope, otherEncoding, squeeze, tables, false, enc.jcoding, + this); } - if (singleByteOptimizableProfile.profile(singlebyte)) { + if (singlebyte) { if (!StringSupport.singleByteSqueeze(buffer, squeeze)) { return nil; } else { - string.setRope(RopeOperations.ropeFromRopeBuilder(buffer)); + string.setTString(buffer.toTString(), buffer.getRubyEncoding()); } } else { - if (!StringSupport - .multiByteSqueeze(buffer, rope.getCodeRange(), squeeze, tables, enc.jcoding, true, this)) { + var codeRange = string.tstring + .getByteCodeRangeUncached(RubyStringLibrary.getUncached().getTEncoding(string)); + if (!StringSupport.multiByteSqueeze(buffer, codeRange, squeeze, tables, enc.jcoding, true, this)) { return nil; } else { - string.setRope(RopeOperations.ropeFromRopeBuilder(buffer)); + string.setTString(buffer.toTString(), buffer.getRubyEncoding()); } } @@ -2701,22 +2444,14 @@ private Object performSqueezeBang(RubyString string, Object[] otherStrings) { @CoreMethod(names = "succ!", raiseIfNotMutableSelf = true) public abstract static class SuccBangNode extends CoreMethodArrayArgumentsNode { - - @Child private MakeLeafRopeNode makeLeafRopeNode = MakeLeafRopeNode.create(); - @Specialization - protected RubyString succBang(RubyString string) { - final Rope rope = string.rope; - - if (!rope.isEmpty()) { - final RopeBuilder succBuilder = StringSupport.succCommon(rope, this); - - final Rope newRope = makeLeafRopeNode.executeMake( - succBuilder.getBytes(), - rope.getEncoding(), - CodeRange.CR_UNKNOWN, - NotProvided.INSTANCE); - string.setRope(newRope); + protected RubyString succBang(RubyString string, + @Cached RubyStringLibrary libString, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { + if (!string.tstring.isEmpty()) { + final TStringBuilder succBuilder = StringSupport.succCommon(string, this); + string.setTString( + fromByteArrayNode.execute(succBuilder.getBytes(), libString.getTEncoding(string), false)); } return string; @@ -2735,18 +2470,21 @@ public static SumNode create() { public abstract Object executeSum(Object string, Object bits); @Child private DispatchNode addNode = DispatchNode.create(); - private final BytesNode bytesNode = BytesNode.create(); + @Child private TruffleString.GetInternalByteArrayNode byteArrayNode = TruffleString.GetInternalByteArrayNode + .create(); @Specialization protected Object sum(Object string, long bits, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { // Copied from JRuby - final Rope rope = strings.getRope(string); - final byte[] bytes = bytesNode.execute(rope); - int p = 0; - final int len = rope.byteLength(); - final int end = p + len; + var tstring = strings.getTString(string); + var encoding = strings.getEncoding(string).tencoding; + var byteArray = byteArrayNode.execute(tstring, encoding); + + var bytes = byteArray.getArray(); + int p = byteArray.getOffset(); + final int end = byteArray.getEnd(); if (bits >= 8 * 8) { // long size * bits in byte Object sum = 0; @@ -2765,7 +2503,7 @@ protected Object sum(Object string, long bits, @Specialization protected Object sum(Object string, NotProvided bits, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { return sum(string, 16, strings); } @@ -2784,17 +2522,17 @@ public abstract static class ToFNode extends CoreMethodArrayArgumentsNode { @Specialization @TruffleBoundary protected double toF(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { try { - return convertToDouble(strings.getRope(string)); + return convertToDouble(strings.getTString(string), strings.getEncoding(string)); } catch (NumberFormatException e) { return 0; } } @TruffleBoundary - private double convertToDouble(Rope rope) { - return new DoubleConverter().parse(rope, false, true); + private double convertToDouble(AbstractTruffleString rope, RubyEncoding encoding) { + return new DoubleConverter().parse(rope, encoding, false, true); } } @@ -2812,54 +2550,52 @@ protected RubyString toS(RubyString string) { } @Specialization(guards = "isStringSubclass(string)") - protected RubyString toSOnSubclass(RubyString string) { - final Shape shape = getLanguage().stringShape; - final RubyString result = new RubyString( - coreLibrary().stringClass, - shape, - false, - string.rope, - string.encoding); - AllocationTracing.trace(result, this); - return result; + protected RubyString toSOnSubclass(RubyString string, + @Cached RubyStringLibrary libString, + @Cached AsTruffleStringNode asTruffleStringNode) { + return createStringCopy(asTruffleStringNode, string.tstring, libString.getEncoding(string)); } public boolean isStringSubclass(RubyString string) { return string.getLogicalClass() != coreLibrary().stringClass; } - } @CoreMethod(names = { "to_sym", "intern" }) - @ImportStatic({ StringCachingGuards.class, StringGuards.class, StringOperations.class }) + @ImportStatic(StringGuards.class) public abstract static class ToSymNode extends CoreMethodArrayArgumentsNode { - @Child CodeRangeNode codeRangeNode = CodeRangeNode.create(); + @Child GetByteCodeRangeNode codeRangeNode = GetByteCodeRangeNode.create(); @Specialization( guards = { - "!isBrokenCodeRange(strings.getRope(string), codeRangeNode)", - "equalNode.execute(strings.getRope(string),cachedRope)", - "strings.getEncoding(string) == cachedEncoding" }, + "!isBrokenCodeRange(tstring, encoding, codeRangeNode)", + "equalNode.execute(tstring, encoding, cachedTString, cachedEncoding)" }, limit = "getDefaultCacheLimit()") protected RubySymbol toSymCached(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached("strings.getRope(string)") Rope cachedRope, + @Cached RubyStringLibrary strings, + @Cached("asTruffleStringUncached(string)") TruffleString cachedTString, @Cached("strings.getEncoding(string)") RubyEncoding cachedEncoding, - @Cached("getSymbol(cachedRope, cachedEncoding)") RubySymbol cachedSymbol, - @Cached RopeNodes.EqualNode equalNode) { + @Cached("getSymbol(cachedTString, cachedEncoding)") RubySymbol cachedSymbol, + @Cached StringHelperNodes.EqualSameEncodingNode equalNode, + @Bind("strings.getTString(string)") AbstractTruffleString tstring, + @Bind("strings.getEncoding(string)") RubyEncoding encoding) { return cachedSymbol; } - @Specialization(guards = "!isBrokenCodeRange(strings.getRope(string), codeRangeNode)", replaces = "toSymCached") + @Specialization(guards = "!isBrokenCodeRange(tstring, encoding, codeRangeNode)", replaces = "toSymCached") protected RubySymbol toSym(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - return getSymbol(strings.getRope(string), strings.getEncoding(string)); + @Cached RubyStringLibrary strings, + @Bind("strings.getTString(string)") AbstractTruffleString tstring, + @Bind("strings.getEncoding(string)") RubyEncoding encoding) { + return getSymbol(strings.getTString(string), strings.getEncoding(string)); } - @Specialization(guards = "isBrokenCodeRange(strings.getRope(string), codeRangeNode)") + @Specialization(guards = "isBrokenCodeRange(tstring, encoding, codeRangeNode)") protected RubySymbol toSymBroken(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings, + @Bind("strings.getTString(string)") AbstractTruffleString tstring, + @Bind("strings.getEncoding(string)") RubyEncoding encoding) { throw new RaiseException(getContext(), coreExceptions().encodingError("invalid encoding symbol", this)); } } @@ -2868,64 +2604,68 @@ protected RubySymbol toSymBroken(Object string, @ImportStatic(StringGuards.class) public abstract static class ReverseBangNode extends CoreMethodArrayArgumentsNode { - @Child CharacterLengthNode characterLengthNode = CharacterLengthNode.create(); - @Child private MakeLeafRopeNode makeLeafRopeNode = MakeLeafRopeNode.create(); + @Child CodePointLengthNode codePointLengthNode = CodePointLengthNode.create(); + @Child private TruffleString.FromByteArrayNode fromByteArrayNode = TruffleString.FromByteArrayNode.create(); - @Specialization(guards = "reverseIsEqualToSelf(string, characterLengthNode)") - protected RubyString reverseNoOp(RubyString string) { + @Specialization(guards = "reverseIsEqualToSelf(tstring, encoding, codePointLengthNode)") + protected RubyString reverseNoOp(RubyString string, + @Cached RubyStringLibrary libString, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { return string; } @Specialization( guards = { - "!reverseIsEqualToSelf(string, characterLengthNode)", - "isSingleByteOptimizable(string, singleByteOptimizableNode)" }) + "!reverseIsEqualToSelf(tstring, encoding, codePointLengthNode)", + "isSingleByteOptimizable(tstring, encoding, singleByteOptimizableNode)" }) protected RubyString reverseSingleByteOptimizable(RubyString string, - @Cached BytesNode bytesNode, - @Cached CodeRangeNode codeRangeNode, - @Cached SingleByteOptimizableNode singleByteOptimizableNode) { - final Rope rope = string.rope; - final byte[] originalBytes = bytesNode.execute(rope); - final int len = originalBytes.length; + @Cached RubyStringLibrary libString, + @Cached SingleByteOptimizableNode singleByteOptimizableNode, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { + var tencoding = encoding.tencoding; + var byteArray = byteArrayNode.execute(tstring, tencoding); + + final int len = byteArray.getLength(); final byte[] reversedBytes = new byte[len]; for (int i = 0; i < len; i++) { - reversedBytes[len - i - 1] = originalBytes[i]; + reversedBytes[len - i - 1] = byteArray.get(i); } - string.setRope( - makeLeafRopeNode.executeMake( - reversedBytes, - rope.getEncoding(), - codeRangeNode.execute(rope), - characterLengthNode.execute(rope))); - + string.setTString(fromByteArrayNode.execute(reversedBytes, tencoding, false)); // codeRangeNode.execute(rope), codePointLengthNode.execute(rope) return string; } @Specialization( guards = { - "!reverseIsEqualToSelf(string, characterLengthNode)", - "!isSingleByteOptimizable(string, singleByteOptimizableNode)" }) + "!reverseIsEqualToSelf(tstring, encoding, codePointLengthNode)", + "!isSingleByteOptimizable(tstring, encoding, singleByteOptimizableNode)" }) protected RubyString reverse(RubyString string, - @Cached BytesNode bytesNode, - @Cached CodeRangeNode codeRangeNode, - @Cached SingleByteOptimizableNode singleByteOptimizableNode) { + @Cached RubyStringLibrary libString, + @Cached SingleByteOptimizableNode singleByteOptimizableNode, + @Cached TruffleString.ByteLengthOfCodePointNode byteLengthOfCodePointNode, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { // Taken from org.jruby.RubyString#reverse! - final Rope rope = string.rope; - final byte[] originalBytes = bytesNode.execute(rope); - int p = 0; - final int len = originalBytes.length; + var tencoding = encoding.tencoding; + var byteArray = byteArrayNode.execute(tstring, tencoding); + + var originalBytes = byteArray.getArray(); + int byteOffset = byteArray.getOffset(); + int p = byteOffset; + final int len = byteArray.getLength(); - final Encoding enc = rope.getEncoding(); - final CodeRange cr = codeRangeNode.execute(rope); final int end = p + len; int op = len; final byte[] reversedBytes = new byte[len]; while (p < end) { - int cl = StringSupport.characterLength(enc, cr, originalBytes, p, end, true); + int cl = byteLengthOfCodePointNode.execute(tstring, p - byteOffset, tencoding); if (cl > 1 || (originalBytes[p] & 0x80) != 0) { op -= cl; System.arraycopy(originalBytes, p, reversedBytes, op, cl); @@ -2935,19 +2675,13 @@ protected RubyString reverse(RubyString string, } } - string.setRope( - makeLeafRopeNode.executeMake( - reversedBytes, - rope.getEncoding(), - codeRangeNode.execute(rope), - characterLengthNode.execute(rope))); - + string.setTString(fromByteArrayNode.execute(reversedBytes, tencoding, false)); // codeRangeNode.execute(rope), codePointLengthNode.execute(rope) return string; } - public static boolean reverseIsEqualToSelf(RubyString string, - CharacterLengthNode characterLengthNode) { - return characterLengthNode.execute(string.rope) <= 1; + public static boolean reverseIsEqualToSelf(AbstractTruffleString tstring, RubyEncoding encoding, + CodePointLengthNode codePointLengthNode) { + return codePointLengthNode.execute(tstring, encoding.tencoding) <= 1; } } @@ -2971,17 +2705,17 @@ protected ToStrNode coerceToStrToString(RubyBaseNodeWithExecute toStr) { return ToStrNodeGen.create(toStr); } - @Specialization(guards = "isEmpty(self.rope)") + @Specialization(guards = "isEmpty(self.tstring)") protected Object trBangSelfEmpty(RubyString self, Object fromStr, Object toStr) { return nil; } @Specialization( guards = { - "!isEmpty(self.rope)", - "isEmpty(libToStr.getRope(toStr))" }) + "!isEmpty(self.tstring)", + "isEmpty(libToStr.getTString(toStr))" }) protected Object trBangToEmpty(RubyString self, Object fromStr, Object toStr, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libToStr) { + @Cached RubyStringLibrary libToStr) { if (deleteBangNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); deleteBangNode = insert(DeleteBangNode.create()); @@ -2993,25 +2727,18 @@ protected Object trBangToEmpty(RubyString self, Object fromStr, Object toStr, @Specialization( guards = { "libFromStr.isRubyString(fromStr)", - "!isEmpty(self.rope)", - "!isEmpty(libToStr.getRope(toStr))" }) + "!isEmpty(self.tstring)", + "!isEmpty(libToStr.getTString(toStr))" }, + limit = "1") protected Object trBangNoEmpty(RubyString self, Object fromStr, Object toStr, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFromStr, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libToStr) { + @Cached RubyStringLibrary libFromStr, + @Cached RubyStringLibrary libToStr) { if (checkEncodingNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); checkEncodingNode = insert(CheckEncodingNode.create()); } - return StringNodesHelper.trTransHelper( - checkEncodingNode, - self, - self.rope, - fromStr, - libFromStr.getRope(fromStr), - toStr, - libToStr.getRope(toStr), - false, + return StringHelperNodes.trTransHelper(checkEncodingNode, self, libFromStr, fromStr, libToStr, toStr, false, this); } } @@ -3037,17 +2764,21 @@ protected ToStrNode coerceToStrToString(RubyBaseNodeWithExecute toStr) { } @Specialization( - guards = { "isEmpty(self.rope)" }) + guards = { "isEmpty(self.tstring)" }) protected Object trSBangEmpty(RubyString self, Object fromStr, Object toStr) { return nil; } @Specialization( - guards = { "libFromStr.isRubyString(fromStr)", "libToStr.isRubyString(toStr)", "!isEmpty(self.rope)" }) + guards = { + "libFromStr.isRubyString(fromStr)", + "libToStr.isRubyString(toStr)", + "!isEmpty(self.tstring)" }, + limit = "1") protected Object trSBang(RubyString self, Object fromStr, Object toStr, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFromStr, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libToStr) { - if (libToStr.getRope(toStr).isEmpty()) { + @Cached RubyStringLibrary libFromStr, + @Cached RubyStringLibrary libToStr) { + if (libToStr.getTString(toStr).isEmpty()) { if (deleteBangNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); deleteBangNode = insert(DeleteBangNode.create()); @@ -3061,15 +2792,7 @@ protected Object trSBang(RubyString self, Object fromStr, Object toStr, checkEncodingNode = insert(CheckEncodingNode.create()); } - return StringNodesHelper.trTransHelper( - checkEncodingNode, - self, - self.rope, - fromStr, - libFromStr.getRope(fromStr), - toStr, - libToStr.getRope(toStr), - true, + return StringHelperNodes.trTransHelper(checkEncodingNode, self, libFromStr, fromStr, libToStr, toStr, true, this); } } @@ -3077,7 +2800,6 @@ protected Object trSBang(RubyString self, Object fromStr, Object toStr, @NodeChild(value = "string", type = RubyNode.class) @NodeChild(value = "format", type = RubyBaseNodeWithExecute.class) @CoreMethod(names = "unpack", required = 1) - @ImportStatic({ StringCachingGuards.class, StringOperations.class }) @ReportPolymorphism public abstract static class UnpackNode extends CoreMethodNode { @@ -3088,24 +2810,26 @@ protected ToStrNode coerceFormat(RubyBaseNodeWithExecute format) { return ToStrNodeGen.create(format); } - @Specialization(guards = { "equalNode.execute(libFormat.getRope(format), cachedFormat)" }) + @Specialization(guards = { "equalNode.execute(libFormat, format, cachedFormat, cachedEncoding)" }) protected RubyArray unpackCached(Object string, Object format, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFormat, - @Cached("libFormat.getRope(format)") Rope cachedFormat, - @Cached("create(compileFormat(libFormat.getRope(format)))") DirectCallNode callUnpackNode, - @Cached BytesNode bytesNode, - @Cached RopeNodes.EqualNode equalNode, - @Cached StringGetAssociatedNode stringGetAssociatedNode) { - final Rope rope = libString.getRope(string); + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary libFormat, + @Cached("asTruffleStringUncached(format)") TruffleString cachedFormat, + @Cached("libFormat.getEncoding(format)") RubyEncoding cachedEncoding, + @Cached("create(compileFormat(getJavaString(format)))") DirectCallNode callUnpackNode, + @Cached StringHelperNodes.EqualNode equalNode, + @Cached StringHelperNodes.StringGetAssociatedNode stringGetAssociatedNode, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode) { + var byteArray = byteArrayNode.execute(libString.getTString(string), libString.getTEncoding(string)); final ArrayResult result; try { result = (ArrayResult) callUnpackNode.call( new Object[]{ - bytesNode.execute(rope), - rope.byteLength(), + byteArray.getArray(), + byteArray.getEnd(), + byteArray.getOffset(), stringGetAssociatedNode.execute(string) }); // TODO impl associated for ImmutableRubyString } catch (FormatException e) { exceptionProfile.enter(); @@ -3117,23 +2841,25 @@ protected RubyArray unpackCached(Object string, Object format, @Specialization( guards = "libFormat.isRubyString(format)", - replaces = "unpackCached") + replaces = "unpackCached", limit = "1") protected RubyArray unpackUncached(Object string, Object format, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFormat, + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary libFormat, + @Cached ToJavaStringNode toJavaStringNode, @Cached IndirectCallNode callUnpackNode, - @Cached BytesNode bytesNode, - @Cached StringGetAssociatedNode stringGetAssociatedNode) { - final Rope rope = libString.getRope(string); + @Cached StringHelperNodes.StringGetAssociatedNode stringGetAssociatedNode, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode) { + var byteArray = byteArrayNode.execute(libString.getTString(string), libString.getTEncoding(string)); final ArrayResult result; try { result = (ArrayResult) callUnpackNode.call( - compileFormat(libFormat.getRope(format)), + compileFormat(toJavaStringNode.executeToJavaString(format)), new Object[]{ - bytesNode.execute(rope), - rope.byteLength(), + byteArray.getArray(), + byteArray.getEnd(), + byteArray.getOffset(), stringGetAssociatedNode.execute(string) }); } catch (FormatException e) { exceptionProfile.enter(); @@ -3148,9 +2874,9 @@ private RubyArray finishUnpack(ArrayResult result) { } @TruffleBoundary - protected RootCallTarget compileFormat(Rope rope) { + protected RootCallTarget compileFormat(String format) { try { - return new UnpackCompiler(getLanguage(), this).compile(RopeOperations.decodeRope(rope)); + return new UnpackCompiler(getLanguage(), this).compile(format); } catch (DeferredRaiseException dre) { throw dre.getException(getContext()); } @@ -3162,192 +2888,58 @@ protected int getCacheLimit() { } - public abstract static class InvertAsciiCaseBytesNode extends RubyBaseNode { - - private final boolean lowerToUpper; - private final boolean upperToLower; - - public static InvertAsciiCaseBytesNode createLowerToUpper() { - return InvertAsciiCaseBytesNodeGen.create(true, false); - } - - public static InvertAsciiCaseBytesNode createUpperToLower() { - return InvertAsciiCaseBytesNodeGen.create(false, true); - } - - public static InvertAsciiCaseBytesNode createSwapCase() { - return InvertAsciiCaseBytesNodeGen.create(true, true); - } - - protected InvertAsciiCaseBytesNode(boolean lowerToUpper, boolean upperToLower) { - this.lowerToUpper = lowerToUpper; - this.upperToLower = upperToLower; - } - - public abstract byte[] executeInvert(byte[] bytes, int start); - - @Specialization - protected byte[] invert(byte[] bytes, int start, - @Cached BranchProfile foundLowerCaseCharProfile, - @Cached BranchProfile foundUpperCaseCharProfile, - @Cached LoopConditionProfile loopProfile) { - byte[] modified = null; - - int i = start; - try { - for (; loopProfile.inject(i < bytes.length); i++) { - final byte b = bytes[i]; - - if (lowerToUpper && StringSupport.isAsciiLowercase(b)) { - foundLowerCaseCharProfile.enter(); - - if (modified == null) { - modified = bytes.clone(); - } - - // Convert lower-case ASCII char to upper-case. - modified[i] ^= 0x20; - } - - if (upperToLower && StringSupport.isAsciiUppercase(b)) { - foundUpperCaseCharProfile.enter(); - - if (modified == null) { - modified = bytes.clone(); - } - - // Convert upper-case ASCII char to lower-case. - modified[i] ^= 0x20; - } - - TruffleSafepoint.poll(this); - } - } finally { - profileAndReportLoopCount(loopProfile, i - start); - } - - return modified; - } - - } - - public abstract static class InvertAsciiCaseNode extends RubyBaseNode { - - @Child private InvertAsciiCaseBytesNode invertNode; - - public static InvertAsciiCaseNode createLowerToUpper() { - return InvertAsciiCaseNodeGen.create(InvertAsciiCaseBytesNode.createLowerToUpper()); - } - - public static InvertAsciiCaseNode createUpperToLower() { - return InvertAsciiCaseNodeGen.create(InvertAsciiCaseBytesNode.createUpperToLower()); - } - - public static InvertAsciiCaseNode createSwapCase() { - return InvertAsciiCaseNodeGen.create(InvertAsciiCaseBytesNode.createSwapCase()); - } - - public InvertAsciiCaseNode(InvertAsciiCaseBytesNode invertNode) { - this.invertNode = invertNode; - } - - public abstract Object executeInvert(RubyString string); - - @Specialization - protected Object invert(RubyString string, - @Cached BytesNode bytesNode, - @Cached CharacterLengthNode characterLengthNode, - @Cached CodeRangeNode codeRangeNode, - @Cached MakeLeafRopeNode makeLeafRopeNode, - @Cached ConditionProfile noopProfile) { - final Rope rope = string.rope; - - final byte[] bytes = bytesNode.execute(rope); - byte[] modified = invertNode.executeInvert(bytes, 0); - - if (noopProfile.profile(modified == null)) { - return nil; - } else { - final Rope newRope = makeLeafRopeNode.executeMake( - modified, - rope.getEncoding(), - codeRangeNode.execute(rope), - characterLengthNode.execute(rope)); - string.setRope(newRope); - - return string; - } - } - - } - @Primitive(name = "string_upcase!", raiseIfNotMutable = 0, lowerFixnum = 1) @ImportStatic({ StringGuards.class, Config.class }) public abstract static class StringUpcaseBangPrimitiveNode extends PrimitiveArrayArgumentsNode { - @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode - .create(); - - @Specialization(guards = { "isSingleByteCaseMapping(string, caseMappingOptions, singleByteOptimizableNode)" }) - protected Object upcaseSingleByte(RubyString string, int caseMappingOptions, - @Cached("createLowerToUpper()") InvertAsciiCaseNode invertAsciiCaseNode) { - return invertAsciiCaseNode.executeInvert(string); - } - - @Specialization(guards = { "isSimpleAsciiCaseMapping(string, caseMappingOptions, singleByteOptimizableNode)" }) - protected Object upcaseMultiByteAsciiSimple(RubyString string, int caseMappingOptions, - @Cached @Shared("bytesNode") BytesNode bytesNode, - @Cached CharacterLengthNode characterLengthNode, - @Cached @Shared("codeRangeNode") CodeRangeNode codeRangeNode, - @Cached @Shared("makeLeafRopeNode") MakeLeafRopeNode makeLeafRopeNode, - @Cached @Shared("dummyEncodingProfile") ConditionProfile dummyEncodingProfile, - @Cached @Shared("modifiedProfile") ConditionProfile modifiedProfile) { - final Rope rope = string.rope; - final Encoding encoding = rope.getEncoding(); + @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode.create(); + private final ConditionProfile dummyEncodingProfile = ConditionProfile.createBinaryProfile(); - if (dummyEncodingProfile.profile(encoding.isDummy())) { + @Specialization( + guards = "!isComplexCaseMapping(tstring, encoding, caseMappingOptions, singleByteOptimizableNode)") + protected Object upcaseAsciiCodePoints(RubyString string, int caseMappingOptions, + @Cached RubyStringLibrary libString, + @Cached("createLowerToUpper()") StringHelperNodes.InvertAsciiCaseNode invertAsciiCaseNode, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { + if (dummyEncodingProfile.profile(encoding.isDummy)) { throw new RaiseException( getContext(), coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(encoding, this)); } - final CodeRange cr = codeRangeNode.execute(rope); - final byte[] inputBytes = bytesNode.execute(rope); - final byte[] outputBytes = StringSupport.upcaseMultiByteAsciiSimple(encoding, cr, inputBytes); - - if (modifiedProfile.profile(inputBytes != outputBytes)) { - string.setRope( - makeLeafRopeNode.executeMake(outputBytes, encoding, cr, characterLengthNode.execute(rope))); - return string; - } else { - return nil; - } + return invertAsciiCaseNode.executeInvert(string); } - @Specialization(guards = { "isComplexCaseMapping(string, caseMappingOptions, singleByteOptimizableNode)" }) + @Specialization( + guards = "isComplexCaseMapping(tstring, encoding, caseMappingOptions, singleByteOptimizableNode)") protected Object upcaseMultiByteComplex(RubyString string, int caseMappingOptions, - @Cached @Shared("bytesNode") BytesNode bytesNode, - @Cached @Shared("codeRangeNode") CodeRangeNode codeRangeNode, - @Cached @Shared("makeLeafRopeNode") MakeLeafRopeNode makeLeafRopeNode, - @Cached @Shared("dummyEncodingProfile") ConditionProfile dummyEncodingProfile, - @Cached @Shared("modifiedProfile") ConditionProfile modifiedProfile) { - final Rope rope = string.rope; - final Encoding encoding = rope.getEncoding(); - - if (dummyEncodingProfile.profile(encoding.isDummy())) { + @Cached RubyStringLibrary libString, + @Cached GetByteCodeRangeNode codeRangeNode, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode, + @Cached ConditionProfile modifiedProfile, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { + var tencoding = encoding.tencoding; + + if (dummyEncodingProfile.profile(encoding.isDummy)) { throw new RaiseException( getContext(), coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(encoding, this)); } - final RopeBuilder builder = RopeBuilder.createRopeBuilder(bytesNode.execute(rope), rope.getEncoding()); + var byteArray = byteArrayNode.execute(tstring, encoding.tencoding); + + // TODO (nirvdrum 24-Jun-22): Make the byte array builder copy-on-write so we don't eagerly clone the source byte array. + var builder = ByteArrayBuilder.create(byteArray); + final boolean modified = StringSupport - .upcaseMultiByteComplex(encoding, codeRangeNode.execute(rope), builder, caseMappingOptions, this); + .upcaseMultiByteComplex(encoding.jcoding, + codeRangeNode.execute(string.tstring, tencoding), + builder, caseMappingOptions, this); if (modifiedProfile.profile(modified)) { - string.setRope( - makeLeafRopeNode - .executeMake(builder.getBytes(), rope.getEncoding(), CR_UNKNOWN, NotProvided.INSTANCE)); - + string.setTString(fromByteArrayNode.execute(builder.getBytes(), tencoding, false)); return string; } else { return nil; @@ -3361,11 +2953,9 @@ public abstract static class ValidEncodingQueryNode extends CoreMethodArrayArgum @Specialization protected boolean validEncoding(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString, - @Cached CodeRangeNode codeRangeNode) { - final CodeRange codeRange = codeRangeNode.execute(libString.getRope(string)); - - return codeRange != CR_BROKEN; + @Cached RubyStringLibrary libString, + @Cached TruffleString.IsValidNode isValidNode) { + return isValidNode.execute(libString.getTString(string), libString.getTEncoding(string)); } } @@ -3374,183 +2964,142 @@ protected boolean validEncoding(Object string, @ImportStatic({ StringGuards.class, Config.class }) public abstract static class StringCapitalizeBangPrimitiveNode extends PrimitiveArrayArgumentsNode { - @Child private BytesNode bytesNode = BytesNode.create(); - @Child private CodeRangeNode codeRangeNode = CodeRangeNode.create(); - @Child private CharacterLengthNode characterLengthNode = CharacterLengthNode.create(); - @Child private MakeLeafRopeNode makeLeafRopeNode = MakeLeafRopeNode.create(); - @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode - .create(); + @Child private GetByteCodeRangeNode codeRangeNode; + @Child private TruffleString.CopyToByteArrayNode copyToByteArrayNode; + @Child private TruffleString.FromByteArrayNode fromByteArrayNode; + @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode.create(); + private final ConditionProfile dummyEncodingProfile = ConditionProfile.createBinaryProfile(); + private final ConditionProfile emptyStringProfile = ConditionProfile.createBinaryProfile(); - @Specialization(guards = "isSingleByteCaseMapping(string, caseMappingOptions, singleByteOptimizableNode)") - protected Object capitalizeSingleByte(RubyString string, int caseMappingOptions, - @Cached("createUpperToLower()") InvertAsciiCaseBytesNode invertAsciiCaseNode, - @Cached @Shared("emptyStringProfile") ConditionProfile emptyStringProfile, + @Specialization( + guards = "!isComplexCaseMapping(tstring, encoding, caseMappingOptions, singleByteOptimizableNode)") + protected Object capitalizeAsciiCodePoints(RubyString string, int caseMappingOptions, + @Cached RubyStringLibrary libString, + @Cached("createUpperToLower()") StringHelperNodes.InvertAsciiCaseHelperNode invertAsciiCaseNode, + @Cached CreateCodePointIteratorNode createCodePointIteratorNode, + @Cached TruffleStringIterator.NextNode nextNode, @Cached @Exclusive ConditionProfile firstCharIsLowerProfile, - @Cached @Exclusive ConditionProfile otherCharsAlreadyLowerProfile, - @Cached @Exclusive ConditionProfile mustCapitalizeFirstCharProfile) { - final Rope rope = string.rope; + @Cached @Exclusive ConditionProfile modifiedProfile, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { + var tencoding = encoding.tencoding; - if (emptyStringProfile.profile(rope.isEmpty())) { + if (emptyStringProfile.profile(tstring.isEmpty())) { return nil; } - final byte[] sourceBytes = bytesNode.execute(rope); - final byte[] finalBytes; - - final byte[] processedBytes = invertAsciiCaseNode.executeInvert(sourceBytes, 1); - - if (otherCharsAlreadyLowerProfile.profile(processedBytes == null)) { - // Bytes 1..N are either not letters or already lowercased. Time to check the first byte. - - if (firstCharIsLowerProfile.profile(StringSupport.isAsciiLowercase(sourceBytes[0]))) { - // The first char requires capitalization, but the remaining bytes in the original string are - // already properly cased. - finalBytes = sourceBytes.clone(); - } else { - // The string is already capitalized. - return nil; - } - } else { - // At least one char was lowercased when looking at bytes 1..N. We still must check the first byte. - finalBytes = processedBytes; - } - - if (mustCapitalizeFirstCharProfile.profile(StringSupport.isAsciiLowercase(sourceBytes[0]))) { - finalBytes[0] ^= 0x20; - } - - string.setRope( - makeLeafRopeNode.executeMake( - finalBytes, - rope.getEncoding(), - codeRangeNode.execute(rope), - characterLengthNode.execute(rope))); - - return string; - } - - @Specialization(guards = "isSimpleAsciiCaseMapping(string, caseMappingOptions, singleByteOptimizableNode)") - protected Object capitalizeMultiByteAsciiSimple(RubyString string, int caseMappingOptions, - @Cached @Shared("dummyEncodingProfile") BranchProfile dummyEncodingProfile, - @Cached @Shared("emptyStringProfile") ConditionProfile emptyStringProfile, - @Cached @Shared("modifiedProfile") ConditionProfile modifiedProfile) { - // Taken from org.jruby.RubyString#capitalize_bang19. - - final Rope rope = string.rope; - final Encoding enc = rope.getEncoding(); - - if (enc.isDummy()) { - dummyEncodingProfile.enter(); + if (dummyEncodingProfile.profile(encoding.isDummy)) { throw new RaiseException( getContext(), - coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(enc, this)); + coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(encoding, this)); } - if (emptyStringProfile.profile(rope.isEmpty())) { - return nil; + byte[] bytes = null; + + var iterator = createCodePointIteratorNode.execute(tstring, tencoding, ErrorHandling.RETURN_NEGATIVE); + int firstCodePoint = nextNode.execute(iterator); + if (firstCharIsLowerProfile.profile(StringSupport.isAsciiLowercase(firstCodePoint))) { + bytes = copyByteArray(tstring, tencoding); + bytes[0] ^= 0x20; } - final CodeRange cr = codeRangeNode.execute(rope); - final byte[] inputBytes = bytesNode.execute(rope); - final byte[] outputBytes = StringSupport.capitalizeMultiByteAsciiSimple(enc, cr, inputBytes); + bytes = invertAsciiCaseNode.executeInvert(string, iterator, bytes); - if (modifiedProfile.profile(inputBytes != outputBytes)) { - string.setRope( - makeLeafRopeNode.executeMake( - outputBytes, - enc, - cr, - characterLengthNode.execute(rope))); + if (modifiedProfile.profile(bytes != null)) { + string.setTString(makeTString(bytes, tencoding)); return string; + } else { + return nil; } - - return nil; } - @Specialization(guards = "isComplexCaseMapping(string, caseMappingOptions, singleByteOptimizableNode)") + @Specialization( + guards = "isComplexCaseMapping(tstring, encoding, caseMappingOptions, singleByteOptimizableNode)") protected Object capitalizeMultiByteComplex(RubyString string, int caseMappingOptions, - @Cached @Shared("dummyEncodingProfile") BranchProfile dummyEncodingProfile, - @Cached @Shared("emptyStringProfile") ConditionProfile emptyStringProfile, - @Cached @Shared("modifiedProfile") ConditionProfile modifiedProfile) { - final Rope rope = string.rope; - final Encoding enc = rope.getEncoding(); - - if (enc.isDummy()) { - dummyEncodingProfile.enter(); + @Cached RubyStringLibrary libString, + @Cached ConditionProfile modifiedProfile, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode, + @Bind("string.tstring") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { + + if (dummyEncodingProfile.profile(encoding.isDummy)) { throw new RaiseException( getContext(), - coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(enc, this)); + coreExceptions().encodingCompatibilityErrorIncompatibleWithOperation(encoding, this)); } - if (emptyStringProfile.profile(rope.isEmpty())) { + if (emptyStringProfile.profile(tstring.isEmpty())) { return nil; } - final RopeBuilder builder = RopeBuilder.createRopeBuilder(bytesNode.execute(rope), rope.getEncoding()); + var byteArray = byteArrayNode.execute(tstring, encoding.tencoding); + + // TODO (nirvdrum 26-May-22): Make the byte array builder copy-on-write so we don't eagerly clone the source byte array. + var builder = ByteArrayBuilder.create(byteArray); + + var cr = getCodeRange(tstring, encoding.tencoding); final boolean modified = StringSupport - .capitalizeMultiByteComplex(enc, codeRangeNode.execute(rope), builder, caseMappingOptions, this); + .capitalizeMultiByteComplex(encoding.jcoding, cr, builder, caseMappingOptions, this); + if (modifiedProfile.profile(modified)) { - string.setRope( - makeLeafRopeNode - .executeMake(builder.getBytes(), rope.getEncoding(), CR_UNKNOWN, NotProvided.INSTANCE)); + string.setTString(makeTString(builder.getUnsafeBytes(), encoding.tencoding)); return string; } else { return nil; } } - } - - @CoreMethod(names = "clear", raiseIfNotMutableSelf = true) - public abstract static class ClearNode extends CoreMethodArrayArgumentsNode { - - @Child private SubstringNode substringNode = SubstringNode.create(); + private byte[] copyByteArray(AbstractTruffleString string, TruffleString.Encoding encoding) { + if (copyToByteArrayNode == null) { + CompilerDirectives.transferToInterpreterAndInvalidate(); + copyToByteArrayNode = insert(TruffleString.CopyToByteArrayNode.create()); + } - @Specialization - protected RubyString clear(RubyString string) { - string.setRope(substringNode.executeSubstring(string.rope, 0, 0)); - return string; + return copyToByteArrayNode.execute(string, encoding); } - } - public static class StringNodesHelper { + private TruffleString.CodeRange getCodeRange(AbstractTruffleString string, TruffleString.Encoding encoding) { + if (codeRangeNode == null) { + CompilerDirectives.transferToInterpreterAndInvalidate(); + codeRangeNode = insert(GetByteCodeRangeNode.create()); + } - @TruffleBoundary - private static Object trTransHelper(CheckEncodingNode checkEncodingNode, RubyString self, Rope selfRope, - Object fromStr, Rope fromStrRope, - Object toStr, Rope toStrRope, boolean sFlag, Node node) { - final RubyEncoding e1 = checkEncodingNode.executeCheckEncoding(self, fromStr); - final RubyEncoding e2 = checkEncodingNode.executeCheckEncoding(self, toStr); - final RubyEncoding enc = e1 == e2 ? e1 : checkEncodingNode.executeCheckEncoding(fromStr, toStr); + return codeRangeNode.execute(string, encoding); + } - final Rope ret = StringSupport - .trTransHelper(selfRope, fromStrRope, toStrRope, e1.jcoding, enc.jcoding, sFlag, node); - if (ret == null) { - return Nil.INSTANCE; + private AbstractTruffleString makeTString(byte[] bytes, TruffleString.Encoding encoding) { + if (fromByteArrayNode == null) { + CompilerDirectives.transferToInterpreterAndInvalidate(); + fromByteArrayNode = insert(TruffleString.FromByteArrayNode.create()); } - self.setRope(ret, enc); - return self; + return fromByteArrayNode.execute(bytes, 0, bytes.length, encoding, false); + } + } + + @CoreMethod(names = "clear", raiseIfNotMutableSelf = true) + public abstract static class ClearNode extends CoreMethodArrayArgumentsNode { + @Specialization + protected RubyString clear(RubyString string, + @Cached RubyStringLibrary libString) { + string.setTString(libString.getTEncoding(string).getEmpty()); + return string; } } - @Primitive(name = "character_printable_p") + @Primitive(name = "character_printable?", lowerFixnum = 0) public abstract static class CharacterPrintablePrimitiveNode extends PrimitiveArrayArgumentsNode { @Specialization - protected boolean isCharacterPrintable(Object character, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached ConditionProfile is7BitProfile, - @Cached AsciiOnlyNode asciiOnlyNode, - @Cached GetCodePointNode getCodePointNode) { - final Rope rope = strings.getRope(character); - final RubyEncoding encoding = strings.getEncoding(character); - final int codePoint = getCodePointNode.executeGetCodePoint(encoding, rope, 0); - - if (is7BitProfile.profile(asciiOnlyNode.execute(rope))) { - return StringSupport.isAsciiPrintable(codePoint); + protected boolean isCharacterPrintable(int codepoint, RubyEncoding encoding, + @Cached ConditionProfile asciiPrintableProfile) { + assert codepoint >= 0; + + if (asciiPrintableProfile + .profile(encoding.isAsciiCompatible && StringSupport.isAscii(codepoint))) { + return StringSupport.isAsciiPrintable(codepoint); } else { - return isMBCPrintable(rope.getEncoding(), codePoint); + return isMBCPrintable(encoding.jcoding, codepoint); } } @@ -3558,13 +3107,13 @@ protected boolean isCharacterPrintable(Object character, protected boolean isMBCPrintable(Encoding encoding, int codePoint) { return encoding.isPrint(codePoint); } - } @Primitive(name = "string_append") public abstract static class StringAppendPrimitiveNode extends CoreMethodArrayArgumentsNode { - @Child private StringAppendNode stringAppendNode = StringAppendNode.create(); + @Child private StringHelperNodes.StringAppendNode stringAppendNode = StringHelperNodes.StringAppendNode + .create(); public static StringAppendPrimitiveNode create() { return StringAppendPrimitiveNodeFactory.create(null); @@ -3574,8 +3123,8 @@ public static StringAppendPrimitiveNode create() { @Specialization protected RubyString stringAppend(RubyString string, Object other) { - final RopeWithEncoding result = stringAppendNode.executeStringAppend(string, other); - string.setRope(result.getRope(), result.getEncoding()); + final RubyString result = stringAppendNode.executeStringAppend(string, other); + string.setTString(result.tstring, result.getEncodingUnprofiled()); return string; } @@ -3585,36 +3134,38 @@ protected RubyString stringAppend(RubyString string, Object other) { @ImportStatic(StringGuards.class) public abstract static class StringAwkSplitPrimitiveNode extends PrimitiveArrayArgumentsNode { - @Child private BytesNode bytesNode = BytesNode.create(); @Child private CallBlockNode yieldNode = CallBlockNode.create(); - @Child CodeRangeNode codeRangeNode = CodeRangeNode.create(); - @Child private GetCodePointNode getCodePointNode = GetCodePointNode.create(); - @Child private StringSubstringNode substringNode = StringSubstringNode.create(); + @Child GetByteCodeRangeNode codeRangeNode = GetByteCodeRangeNode.create(); private static final int SUBSTRING_CREATED = -1; - @Specialization(guards = "is7Bit(strings.getRope(string), codeRangeNode)") + @Specialization(guards = "is7Bit(tstring, encoding, codeRangeNode)") protected Object stringAwkSplitSingleByte(Object string, int limit, Object block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, + @Cached RubyStringLibrary strings, @Cached ConditionProfile executeBlockProfile, @Cached ConditionProfile growArrayProfile, @Cached ConditionProfile trailingSubstringProfile, - @Cached ConditionProfile trailingEmptyStringProfile) { + @Cached ConditionProfile trailingEmptyStringProfile, + @Cached TruffleString.MaterializeNode materializeNode, + @Cached TruffleString.ReadByteNode readByteNode, + @Cached TruffleString.SubstringByteIndexNode substringNode, + @Bind("strings.getTString(string)") AbstractTruffleString tstring, + @Bind("strings.getEncoding(string)") RubyEncoding encoding) { Object[] ret = new Object[10]; int storeIndex = 0; - final Rope rope = strings.getRope(string); - final byte[] bytes = bytesNode.execute(rope); + int byteLength = tstring.byteLength(encoding.tencoding); + materializeNode.execute(tstring, encoding.tencoding); int substringStart = 0; boolean findingSubstringEnd = false; - for (int i = 0; i < bytes.length; i++) { - if (StringSupport.isAsciiSpace(bytes[i])) { + for (int i = 0; i < byteLength; i++) { + if (StringSupport.isAsciiSpace(readByteNode.execute(tstring, i, encoding.tencoding))) { if (findingSubstringEnd) { findingSubstringEnd = false; - final RubyString substring = substringNode - .executeSubstring(string, substringStart, i - substringStart); + final RubyString substring = createSubString(substringNode, tstring, encoding, substringStart, + i - substringStart); ret = addSubstring( ret, storeIndex++, @@ -3637,13 +3188,14 @@ protected Object stringAwkSplitSingleByte(Object string, int limit, Object block } if (trailingSubstringProfile.profile(findingSubstringEnd)) { - final RubyString substring = substringNode - .executeSubstring(string, substringStart, bytes.length - substringStart); + final RubyString substring = createSubString(substringNode, tstring, encoding, substringStart, + byteLength - substringStart); ret = addSubstring(ret, storeIndex++, substring, block, executeBlockProfile, growArrayProfile); } - if (trailingEmptyStringProfile.profile(limit < 0 && StringSupport.isAsciiSpace(bytes[bytes.length - 1]))) { - final RubyString substring = substringNode.executeSubstring(string, bytes.length - 1, 0); + if (trailingEmptyStringProfile.profile(limit < 0 && + StringSupport.isAsciiSpace(readByteNode.execute(tstring, byteLength - 1, encoding.tencoding)))) { + final RubyString substring = createSubString(substringNode, tstring, encoding, byteLength - 1, 0); ret = addSubstring(ret, storeIndex++, substring, block, executeBlockProfile, growArrayProfile); } @@ -3654,40 +3206,39 @@ protected Object stringAwkSplitSingleByte(Object string, int limit, Object block } } - @TruffleBoundary - @Specialization(guards = "!is7Bit(strings.getRope(string), codeRangeNode)") + @Specialization(guards = "isValid(tstring, encoding, codeRangeNode)") protected Object stringAwkSplit(Object string, int limit, Object block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, + @Cached RubyStringLibrary strings, @Cached ConditionProfile executeBlockProfile, @Cached ConditionProfile growArrayProfile, - @Cached ConditionProfile trailingSubstringProfile) { + @Cached ConditionProfile trailingSubstringProfile, + @Cached CreateCodePointIteratorNode createCodePointIteratorNode, + @Cached TruffleStringIterator.NextNode nextNode, + @Cached TruffleString.SubstringByteIndexNode substringNode, + @Bind("strings.getTString(string)") AbstractTruffleString tstring, + @Bind("strings.getEncoding(string)") RubyEncoding encoding) { Object[] ret = new Object[10]; int storeIndex = 0; - final Rope rope = strings.getRope(string); - final RubyEncoding rubyEncoding = strings.getEncoding(string); final boolean limitPositive = limit > 0; int i = limit > 0 ? 1 : 0; - final byte[] bytes = bytesNode.execute(rope); - int p = 0; - int ptr = p; - int len = rope.byteLength(); - int end = p + len; - final Encoding enc = rope.getEncoding(); - final CodeRange cr = rope.getCodeRange(); - boolean skip = true; + var tencoding = encoding.tencoding; + final int len = tstring.byteLength(tencoding); + var iterator = createCodePointIteratorNode.execute(tstring, tencoding, ErrorHandling.RETURN_NEGATIVE); + + boolean skip = true; int e = 0, b = 0; - while (p < end) { - final int c = getCodePointNode.executeGetCodePoint(rubyEncoding, rope, p); - p += StringSupport.characterLength(enc, cr, bytes, p, end, true); + while (iterator.hasNext()) { + int c = nextNode.execute(iterator); + int p = iterator.getByteIndex(); if (skip) { if (StringSupport.isAsciiSpace(c)) { - b = p - ptr; + b = p; } else { - e = p - ptr; + e = p; skip = false; if (limitPositive && limit <= i) { break; @@ -3695,7 +3246,7 @@ protected Object stringAwkSplit(Object string, int limit, Object block, } } else { if (StringSupport.isAsciiSpace(c)) { - final RubyString substring = substringNode.executeSubstring(string, b, e - b); + var substring = createSubString(substringNode, tstring, encoding, b, e - b); ret = addSubstring( ret, storeIndex++, @@ -3704,18 +3255,18 @@ protected Object stringAwkSplit(Object string, int limit, Object block, executeBlockProfile, growArrayProfile); skip = true; - b = p - ptr; + b = p; if (limitPositive) { i++; } } else { - e = p - ptr; + e = p; } } } if (trailingSubstringProfile.profile(len > 0 && (limitPositive || len > b || limit < 0))) { - final RubyString substring = substringNode.executeSubstring(string, b, len - b); + var substring = createSubString(substringNode, tstring, encoding, b, len - b); ret = addSubstring(ret, storeIndex++, substring, block, executeBlockProfile, growArrayProfile); } @@ -3726,6 +3277,14 @@ protected Object stringAwkSplit(Object string, int limit, Object block, } } + @Specialization(guards = "isBrokenCodeRange(tstring, encoding, codeRangeNode)") + protected Object broken(Object string, int limit, Object block, + @Cached RubyStringLibrary strings, + @Bind("strings.getTString(string)") AbstractTruffleString tstring, + @Bind("strings.getEncoding(string)") RubyEncoding encoding) { + throw new RaiseException(getContext(), coreExceptions().argumentErrorInvalidByteSequence(encoding, this)); + } + private Object[] addSubstring(Object[] store, int index, RubyString substring, Object block, ConditionProfile executeBlockProfile, ConditionProfile growArrayProfile) { if (executeBlockProfile.profile(block != nil)) { @@ -3747,41 +3306,24 @@ private Object[] addSubstring(Object[] store, int index, RubyString substring, @Primitive(name = "string_byte_substring", lowerFixnum = { 1, 2 }) public abstract static class StringByteSubstringPrimitiveNode extends PrimitiveArrayArgumentsNode { - @Child private NormalizeIndexNode normalizeIndexNode = NormalizeIndexNode.create(); - @Child private StringSubstringNode substringNode = StringSubstringNode.create(); - - public static StringByteSubstringPrimitiveNode create() { - return StringByteSubstringPrimitiveNodeFactory.create(null); - } - - public abstract Object executeStringByteSubstring(Object string, Object index, Object length); + @Child private StringHelperNodes.NormalizeIndexNode normalizeIndexNode = StringHelperNodes.NormalizeIndexNode + .create(); @Specialization protected Object stringByteSubstring(Object string, int index, NotProvided length, - @Cached ConditionProfile negativeLengthProfile, @Cached ConditionProfile indexOutOfBoundsProfile, - @Cached ConditionProfile lengthTooLongProfile, - @Cached ConditionProfile nilSubstringProfile, - @Cached ConditionProfile emptySubstringProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - final Object subString = stringByteSubstring( - string, - index, - 1, - negativeLengthProfile, - indexOutOfBoundsProfile, - lengthTooLongProfile, - libString); - - if (nilSubstringProfile.profile(subString == nil)) { - return subString; - } + @Cached RubyStringLibrary libString, + @Cached TruffleString.SubstringByteIndexNode substringNode) { + var tString = libString.getTString(string); + var encoding = libString.getEncoding(string); + final int stringByteLength = tString.byteLength(encoding.tencoding); + final int normalizedIndex = normalizeIndexNode.executeNormalize(index, stringByteLength); - if (emptySubstringProfile.profile(((RubyString) subString).rope.isEmpty())) { + if (indexOutOfBoundsProfile.profile(normalizedIndex < 0 || normalizedIndex >= stringByteLength)) { return nil; } - return subString; + return createSubString(substringNode, tString, encoding, normalizedIndex, 1); } @Specialization @@ -3789,13 +3331,15 @@ protected Object stringByteSubstring(Object string, int index, int length, @Cached ConditionProfile negativeLengthProfile, @Cached ConditionProfile indexOutOfBoundsProfile, @Cached ConditionProfile lengthTooLongProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached RubyStringLibrary libString, + @Cached TruffleString.SubstringByteIndexNode substringNode) { if (negativeLengthProfile.profile(length < 0)) { return nil; } - final Rope rope = libString.getRope(string); - final int stringByteLength = rope.byteLength(); + var tString = libString.getTString(string); + var encoding = libString.getEncoding(string); + final int stringByteLength = tString.byteLength(encoding.tencoding); final int normalizedIndex = normalizeIndexNode.executeNormalize(index, stringByteLength); if (indexOutOfBoundsProfile.profile(normalizedIndex < 0 || normalizedIndex > stringByteLength)) { @@ -3803,10 +3347,10 @@ protected Object stringByteSubstring(Object string, int index, int length, } if (lengthTooLongProfile.profile(normalizedIndex + length > stringByteLength)) { - length = rope.byteLength() - normalizedIndex; + length = stringByteLength - normalizedIndex; } - return substringNode.executeSubstring(string, normalizedIndex, length); + return createSubString(substringNode, tString, encoding, normalizedIndex, length); } @Fallback @@ -3816,100 +3360,63 @@ protected Object stringByteSubstring(Object string, Object range, Object length) } + /** Like {@code string.byteslice(byteIndex)} but returns nil if the character is broken. */ @Primitive(name = "string_chr_at", lowerFixnum = 1) @ImportStatic(StringGuards.class) public abstract static class StringChrAtPrimitiveNode extends CoreMethodArrayArgumentsNode { @Specialization( - guards = { "indexOutOfBounds(strings.getRope(string), byteIndex)" }) + guards = { "indexOutOfBounds(strings.byteLength(string), byteIndex)" }) protected Object stringChrAtOutOfBounds(Object string, int byteIndex, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { return nil; } @Specialization( guards = { - "!indexOutOfBounds(strings.getRope(string), byteIndex)", - "isSingleByteOptimizable(strings.getRope(string), singleByteOptimizableNode)" }) + "!indexOutOfBounds(tstring.byteLength(encoding.tencoding), byteIndex)", + "is7Bit(tstring, encoding, codeRangeNode)" }) protected Object stringChrAtSingleByte(Object string, int byteIndex, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached StringByteSubstringPrimitiveNode stringByteSubstringNode, - @Cached SingleByteOptimizableNode singleByteOptimizableNode) { - return stringByteSubstringNode.executeStringByteSubstring(string, byteIndex, 1); + @Cached RubyStringLibrary strings, + @Cached TruffleString.GetByteCodeRangeNode codeRangeNode, + @Cached TruffleString.SubstringByteIndexNode substringByteIndexNode, + @Bind("strings.getTString(string)") AbstractTruffleString tstring, + @Bind("strings.getEncoding(string)") RubyEncoding encoding) { + return createSubString(substringByteIndexNode, tstring, encoding, byteIndex, 1); } @Specialization( guards = { - "!indexOutOfBounds(strings.getRope(string), byteIndex)", - "!isSingleByteOptimizable(strings.getRope(string), singleByteOptimizableNode)" }) + "!indexOutOfBounds(originalTString.byteLength(originalEncoding.tencoding), byteIndex)", + "!is7Bit(originalTString, originalEncoding, codeRangeNode)" }) protected Object stringChrAt(Object string, int byteIndex, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, + @Cached RubyStringLibrary strings, + @Cached TruffleString.GetByteCodeRangeNode codeRangeNode, @Cached GetActualEncodingNode getActualEncodingNode, - @Cached BytesNode bytesNode, - @Cached CalculateCharacterLengthNode calculateCharacterLengthNode, - @Cached CodeRangeNode codeRangeNode, - @Cached SingleByteOptimizableNode singleByteOptimizableNode, - @Cached MakeStringNode makeStringNode) { - final Rope rope = strings.getRope(string); - final RubyEncoding encoding = getActualEncodingNode.execute(rope, strings.getEncoding(string)); - final int end = rope.byteLength(); - final byte[] bytes = bytesNode.execute(rope); - final int c = calculateCharacterLengthNode.characterLength( - encoding.jcoding, - codeRangeNode.execute(rope), - Bytes.fromRange(bytes, byteIndex, end)); - - if (!StringSupport.MBCLEN_CHARFOUND_P(c)) { - return nil; - } - - if (c + byteIndex > end) { + @Cached TruffleString.SubstringByteIndexNode substringByteIndexNode, + @Cached TruffleString.ForceEncodingNode forceEncodingNode, + @Cached TruffleString.ByteLengthOfCodePointNode byteLengthOfCodePointNode, + @Cached ConditionProfile brokenProfile, + @Bind("strings.getTString(string)") AbstractTruffleString originalTString, + @Bind("strings.getEncoding(string)") RubyEncoding originalEncoding) { + final RubyEncoding actualEncoding = getActualEncodingNode.execute(originalTString, originalEncoding); + var tstring = forceEncodingNode.execute(originalTString, originalEncoding.tencoding, + actualEncoding.tencoding); + + final int clen = byteLengthOfCodePointNode.execute(tstring, byteIndex, actualEncoding.tencoding, + ErrorHandling.RETURN_NEGATIVE); + + if (brokenProfile.profile(!StringSupport.MBCLEN_CHARFOUND_P(clen))) { return nil; } - return makeStringNode.executeMake( - ArrayUtils.extractRange(bytes, byteIndex, byteIndex + c), - encoding, - CR_UNKNOWN); - } - - protected static boolean indexOutOfBounds(Rope rope, int byteIndex) { - return ((byteIndex < 0) || (byteIndex >= rope.byteLength())); - } - - } - - @ImportStatic({ StringGuards.class, StringOperations.class }) - public abstract static class StringEqualNode extends RubyBaseNode { - - @Child private AreComparableRopesNode areComparableNode; - - public abstract boolean executeStringEqual(Rope string, Rope other); - - // Same Rope implies same Encoding and therefore comparable - @Specialization(guards = "string == other") - protected boolean sameRope(Rope string, Rope other) { - return true; - } - - @Specialization(guards = "!areComparable(string, other)") - protected boolean notComparable(Rope string, Rope other) { - return false; - } + assert byteIndex + clen <= tstring.byteLength(actualEncoding.tencoding); - @Specialization( - guards = "areComparable(string, other)") - protected boolean stringEquals(Rope string, Rope other, - @Cached RopeNodes.BytesEqualNode bytesEqualNode) { - return bytesEqualNode.execute(string, other); + return createSubString(substringByteIndexNode, tstring, actualEncoding, byteIndex, clen); } - protected boolean areComparable(Rope string, Rope other) { - if (areComparableNode == null) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - areComparableNode = insert(AreComparableRopesNode.create()); - } - return areComparableNode.execute(string, other); + protected static boolean indexOutOfBounds(int byteLength, int byteIndex) { + return byteIndex < 0 || byteIndex >= byteLength; } } @@ -3919,48 +3426,40 @@ public abstract static class StringEscapePrimitiveNode extends PrimitiveArrayArg @Specialization protected RubyString string_escape(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached StringNodes.MakeStringNode makeStringNode) { - final Rope rope = rbStrEscape(strings.getRope(string)); - return makeStringNode.fromRope(rope, Encodings.US_ASCII); + @Cached RubyStringLibrary strings, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode) { + var tstring = strings.getTString(string); + var encoding = strings.getEncoding(string); + var byteArray = byteArrayNode.execute(tstring, encoding.tencoding); + final TruffleString escaped = rbStrEscape(tstring, encoding, byteArray); + return createString(escaped, Encodings.US_ASCII); } // MRI: rb_str_escape @TruffleBoundary - private static Rope rbStrEscape(Rope str) { - final Encoding enc = str.getEncoding(); - final byte[] pBytes = str.getBytes(); - final CodeRange cr = str.getCodeRange(); - - int p = 0; - int pend = str.byteLength(); - int prev = p; - RopeBuilder result = new RopeBuilder(); - boolean unicode_p = enc.isUnicode(); - boolean asciicompat = enc.isAsciiCompatible(); - - while (p < pend) { - int c, cc; - int n = StringSupport.characterLength(enc, cr, pBytes, p, pend, false); - if (!MBCLEN_CHARFOUND_P(n)) { - if (p > prev) { - result.append(pBytes, prev, p - prev); - } - n = enc.minLength(); - if (pend < p + n) { - n = (pend - p); - } - while ((n--) > 0) { - result.append( - String.format("\\x%02X", (long) (pBytes[p] & 0377)).getBytes( - StandardCharsets.US_ASCII)); - prev = ++p; + private static TruffleString rbStrEscape(AbstractTruffleString tstring, RubyEncoding encoding, + InternalByteArray byteArray) { + var tencoding = encoding.tencoding; + + TStringBuilder result = new TStringBuilder(); + boolean unicode_p = encoding.isUnicode; + boolean asciicompat = encoding.isAsciiCompatible; + var iterator = CreateCodePointIteratorNode.getUncached().execute(tstring, tencoding, + ErrorHandling.RETURN_NEGATIVE); + + while (iterator.hasNext()) { + final int p = iterator.getByteIndex(); + int c = iterator.nextUncached(); + + if (c == -1) { + int n = iterator.getByteIndex() - p; + for (int i = 0; i < n; i++) { + result.append(StringUtils.formatASCIIBytes("\\x%02X", (long) (byteArray.get(p + i) & 0377))); } continue; } - n = MBCLEN_CHARFOUND_LEN(n); - c = enc.mbcToCode(pBytes, p, pend); - p += n; + + final int cc; switch (c) { case '\n': cc = 'n'; @@ -3990,19 +3489,13 @@ private static Rope rbStrEscape(Rope str) { cc = 0; break; } + if (cc != 0) { - if (p - n > prev) { - result.append(pBytes, prev, p - n - prev); - } result.append('\\'); result.append((byte) cc); - prev = p; } else if (asciicompat && Encoding.isAscii(c) && (c < 0x7F && c > 31 /* ISPRINT(c) */)) { + result.append(byteArray, p, p - iterator.getByteIndex()); } else { - if (p - n > prev) { - result.append(pBytes, prev, p - n - prev); - } - if (unicode_p && (c & 0xFFFFFFFFL) < 0x7F && Encoding.isAscii(c) && ASCIIEncoding.INSTANCE.isPrint(c)) { result.append(StringUtils.formatASCIIBytes("%c", (char) (c & 0xFFFFFFFFL))); @@ -4010,15 +3503,11 @@ private static Rope rbStrEscape(Rope str) { result.append(StringUtils.formatASCIIBytes(escapedCharFormat(c, unicode_p), c & 0xFFFFFFFFL)); } - prev = p; } } - if (p > prev) { - result.append(pBytes, prev, p - prev); - } - result.setEncoding(USASCIIEncoding.INSTANCE); - return result.toRope(CodeRange.CR_7BIT); + result.setEncoding(Encodings.US_ASCII); + return result.toTString(); // CodeRange.CR_7BIT } private static int MBCLEN_CHARFOUND_LEN(int r) { @@ -4058,115 +3547,107 @@ private static String escapedCharFormat(int c, boolean isUnicode) { @ImportStatic(StringGuards.class) public abstract static class StringFindCharacterNode extends CoreMethodArrayArgumentsNode { - @Child private StringSubstringNode substringNode = StringSubstringNode.create(); - @Specialization(guards = "offset < 0") protected Object stringFindCharacterNegativeOffset(Object string, int offset) { return nil; } - @Specialization(guards = "offsetTooLarge(strings.getRope(string), offset)") + @Specialization(guards = "offsetTooLarge(strings.byteLength(string), offset)") protected Object stringFindCharacterOffsetTooLarge(Object string, int offset, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { return nil; } @Specialization( guards = { "offset >= 0", - "!offsetTooLarge(strings.getRope(string), offset)", - "isSingleByteOptimizable(strings.getRope(string), singleByteOptimizableNode)" }) + "!offsetTooLarge(strings.byteLength(string), offset)", + "isSingleByteOptimizable(strings.getTString(string), strings.getEncoding(string), singleByteOptimizableNode)" }) protected Object stringFindCharacterSingleByte(Object string, int offset, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached SingleByteOptimizableNode singleByteOptimizableNode) { + @Cached RubyStringLibrary strings, + @Cached SingleByteOptimizableNode singleByteOptimizableNode, + @Cached TruffleString.SubstringByteIndexNode substringNode) { // Taken from Rubinius's String::find_character. - - return substringNode.executeSubstring(string, offset, 1); + return createSubString(substringNode, strings, string, offset, 1); } @Specialization( guards = { "offset >= 0", - "!offsetTooLarge(strings.getRope(string), offset)", - "!isSingleByteOptimizable(strings.getRope(string), singleByteOptimizableNode)" }) + "!offsetTooLarge(strings.byteLength(string), offset)", + "!isSingleByteOptimizable(strings.getTString(string), strings.getEncoding(string), singleByteOptimizableNode)" }) protected Object stringFindCharacter(Object string, int offset, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached GetBytesObjectNode getBytesObject, - @Cached CalculateCharacterLengthNode calculateCharacterLengthNode, - @Cached CodeRangeNode codeRangeNode, - @Cached SingleByteOptimizableNode singleByteOptimizableNode) { + @Cached RubyStringLibrary strings, + @Cached TruffleString.ByteLengthOfCodePointNode byteLengthOfCodePointNode, + @Cached SingleByteOptimizableNode singleByteOptimizableNode, + @Cached TruffleString.SubstringByteIndexNode substringNode) { // Taken from Rubinius's String::find_character. + var tstring = strings.getTString(string); + var tencoding = strings.getTEncoding(string); - final Rope rope = strings.getRope(string); - final Encoding enc = rope.getEncoding(); - final CodeRange cr = codeRangeNode.execute(rope); + int clen = byteLengthOfCodePointNode.execute(tstring, offset, tencoding, ErrorHandling.BEST_EFFORT); + return createSubString(substringNode, strings, string, offset, clen); + } - final int clen = calculateCharacterLengthNode - .characterLength(enc, cr, getBytesObject.getClamped(rope, offset, enc.maxLength())); - - return substringNode.executeSubstring(string, offset, clen); - } - - protected static boolean offsetTooLarge(Rope rope, int offset) { - return offset >= rope.byteLength(); + protected static boolean offsetTooLarge(int byteLength, int offset) { + return offset >= byteLength; } } - @NonStandard - @CoreMethod(names = "from_codepoint", onSingleton = true, required = 2, lowerFixnum = 1) - public abstract static class StringFromCodepointPrimitiveNode extends CoreMethodArrayArgumentsNode { + @Primitive(name = "string_from_codepoint", lowerFixnum = 0) + public abstract static class StringFromCodepointPrimitiveNode extends PrimitiveArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); - - @Specialization(guards = { "isSimple(code, rubyEncoding)", "isCodepoint(code)" }) - protected RubyString stringFromCodepointSimple(long code, RubyEncoding rubyEncoding, + @Specialization(guards = "isSimple(code, encoding)") + protected RubyString stringFromCodepointSimple(int code, RubyEncoding encoding, @Cached ConditionProfile isUTF8Profile, @Cached ConditionProfile isUSAsciiProfile, - @Cached ConditionProfile isAscii8BitProfile) { - final int intCode = (int) code; // isSimple() guarantees this is OK - final Encoding encoding = rubyEncoding.jcoding; - final Rope rope; - - if (isUTF8Profile.profile(encoding == UTF8Encoding.INSTANCE)) { - rope = RopeConstants.UTF8_SINGLE_BYTE_ROPES[intCode]; - } else if (isUSAsciiProfile.profile(encoding == USASCIIEncoding.INSTANCE)) { - rope = RopeConstants.US_ASCII_SINGLE_BYTE_ROPES[intCode]; - } else if (isAscii8BitProfile.profile(encoding == ASCIIEncoding.INSTANCE)) { - rope = RopeConstants.ASCII_8BIT_SINGLE_BYTE_ROPES[intCode]; + @Cached ConditionProfile isAscii8BitProfile, + @Cached TruffleString.FromCodePointNode fromCodePointNode) { + final TruffleString tstring; + if (isUTF8Profile.profile(encoding == Encodings.UTF_8)) { + tstring = TStringConstants.UTF8_SINGLE_BYTE[code]; + } else if (isUSAsciiProfile.profile(encoding == Encodings.US_ASCII)) { + tstring = TStringConstants.US_ASCII_SINGLE_BYTE[code]; + } else if (isAscii8BitProfile.profile(encoding == Encodings.BINARY)) { + tstring = TStringConstants.BINARY_SINGLE_BYTE[code]; } else { - rope = RopeOperations.create(new byte[]{ (byte) intCode }, encoding, CodeRange.CR_UNKNOWN); + tstring = fromCodePointNode.execute(code, encoding.tencoding, false); + assert tstring != null; } - return makeStringNode.fromRope(rope, rubyEncoding); + return createString(tstring, encoding); } - @Specialization(guards = { "!isSimple(code, rubyEncoding)", "isCodepoint(code)" }) - protected RubyString stringFromCodepoint(long code, RubyEncoding rubyEncoding, - @Cached CalculateCharacterLengthNode calculateCharacterLengthNode, + @Specialization(guards = "!isSimple(code, encoding)") + protected RubyString stringFromCodepoint(int code, RubyEncoding encoding, + @Cached TruffleString.FromCodePointNode fromCodePointNode, @Cached BranchProfile errorProfile) { - final Encoding encoding = rubyEncoding.jcoding; - - final int length = StringSupport.codeLength(encoding, (int) code); - if (length <= 0) { + var tstring = fromCodePointNode.execute(code, encoding.tencoding, false); + if (tstring == null) { errorProfile.enter(); - throw new RaiseException(getContext(), coreExceptions().rangeError(code, rubyEncoding, this)); + throw new RaiseException(getContext(), coreExceptions().rangeError(code, encoding, this)); } - final byte[] bytes = new byte[length]; - final int codeToMbc = StringSupport.codeToMbc(encoding, (int) code, bytes, 0); - if (codeToMbc < 0) { - errorProfile.enter(); - throw new RaiseException(getContext(), coreExceptions().rangeError(code, rubyEncoding, this)); - } + return createString(tstring, encoding); + } - final Bytes bytesObject = new Bytes(bytes, 0, length); - if (calculateCharacterLengthNode.characterLength(encoding, CR_UNKNOWN, bytesObject) != length) { + @Specialization(guards = "isCodepoint(code)") + protected RubyString stringFromLongCodepoint(long code, RubyEncoding encoding, + @Cached TruffleString.FromCodePointNode fromCodePointNode, + @Cached BranchProfile errorProfile) { + var tstring = fromCodePointNode.execute((int) code, encoding.tencoding, false); + if (tstring == null) { errorProfile.enter(); - throw new RaiseException(getContext(), coreExceptions().rangeError(code, rubyEncoding, this)); + throw new RaiseException(getContext(), coreExceptions().rangeError(code, encoding, this)); } - return makeStringNode.executeMake(bytes, rubyEncoding, CodeRange.CR_VALID); + return createString(tstring, encoding); + } + + @Specialization(guards = "!isCodepoint(code)") + protected RubyString tooBig(long code, RubyEncoding encoding) { + throw new RaiseException(getContext(), coreExceptions().rangeError(code, encoding, this)); } protected boolean isCodepoint(long code) { @@ -4174,13 +3655,10 @@ protected boolean isCodepoint(long code) { return code >= 0 && code < (1L << 32); } - protected boolean isSimple(long code, RubyEncoding encoding) { - final Encoding enc = encoding.jcoding; - - return (enc.isAsciiCompatible() && code >= 0x00 && code < 0x80) || - (enc == ASCIIEncoding.INSTANCE && code >= 0x00 && code <= 0xFF); + protected boolean isSimple(int codepoint, RubyEncoding encoding) { + return (encoding.isAsciiCompatible && codepoint >= 0x00 && codepoint < 0x80) || + (encoding == Encodings.BINARY && codepoint >= 0x00 && codepoint <= 0xFF); } - } @Primitive(name = "string_to_f") @@ -4189,15 +3667,15 @@ public abstract static class StringToFPrimitiveNode extends PrimitiveArrayArgume @TruffleBoundary @Specialization protected Object stringToF(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached FixnumOrBignumNode fixnumOrBignumNode, - @Cached BytesNode bytesNode) { - final Rope rope = strings.getRope(string); + @Cached RubyStringLibrary strings, + @Cached FixnumOrBignumNode fixnumOrBignumNode) { + var rope = strings.getTString(string); + var encoding = strings.getEncoding(string); if (rope.isEmpty()) { return nil; } - final String javaString = strings.getJavaString(string); + final String javaString = RubyGuards.getJavaString(string); if (javaString.startsWith("0x")) { try { return Double.parseDouble(javaString); @@ -4208,8 +3686,8 @@ protected Object stringToF(Object string, getContext(), this, fixnumOrBignumNode, - bytesNode, rope, + encoding, 16, true); if (result instanceof Integer) { @@ -4224,7 +3702,7 @@ protected Object stringToF(Object string, } } try { - return new DoubleConverter().parse(rope, true, true); + return new DoubleConverter().parse(rope, encoding, true, true); } catch (NumberFormatException e) { return nil; } @@ -4236,402 +3714,114 @@ protected Object stringToF(Object string, @ImportStatic(StringGuards.class) public abstract static class StringIndexPrimitiveNode extends PrimitiveArrayArgumentsNode { - @Child private CheckEncodingNode checkEncodingNode; - @Child CodeRangeNode codeRangeNode = CodeRangeNode.create(); - @Child SingleByteOptimizableNode singleByteNode = SingleByteOptimizableNode.create(); - - @Specialization( - guards = "isEmpty(stringsPattern.getRope(pattern))") - protected int stringIndexEmptyPattern(Object string, Object pattern, int byteOffset, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsPattern) { + @Specialization(guards = "patternTString.isEmpty()") + protected int stringIndexEmptyPattern(Object rubyString, Object rubyPattern, int byteOffset, + @Cached RubyStringLibrary libPattern, + @Bind("libPattern.getTString(rubyPattern)") AbstractTruffleString patternTString) { assert byteOffset >= 0; return byteOffset; } - @Specialization( - guards = { - "isSingleByteString(libPattern.getRope(pattern))", - "!isBrokenCodeRange(libPattern.getRope(pattern), codeRangeNode)", - "canMemcmp(libString.getRope(string), libPattern.getRope(pattern), singleByteNode)" }) - protected Object stringIndexSingleBytePattern(Object string, Object pattern, int byteOffset, - @Cached BytesNode bytesNode, + @Specialization(guards = "!patternTString.isEmpty()") + protected Object findStringByteIndex(Object rubyString, Object rubyPattern, int byteOffset, + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary libPattern, + @Cached CheckEncodingNode checkEncodingNode, + @Cached TruffleString.ByteIndexOfStringNode indexOfStringNode, @Cached ConditionProfile offsetTooLargeProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libPattern) { + @Cached ConditionProfile notFoundProfile, + @Bind("libPattern.getTString(rubyPattern)") AbstractTruffleString patternTString) { assert byteOffset >= 0; - checkEncoding(string, pattern); + var compatibleEncoding = checkEncodingNode.executeCheckEncoding(rubyString, rubyPattern); - final Rope sourceRope = libString.getRope(string); - final int end = sourceRope.byteLength(); + var string = libString.getTString(rubyString); + int stringByteLength = string.byteLength(libString.getTEncoding(rubyString)); - if (offsetTooLargeProfile.profile(byteOffset >= end)) { + if (offsetTooLargeProfile.profile(byteOffset >= stringByteLength)) { return nil; } - final byte[] sourceBytes = bytesNode.execute(sourceRope); - final byte searchByte = bytesNode.execute(libPattern.getRope(pattern))[0]; - - final int index = com.oracle.truffle.api.ArrayUtils.indexOf(sourceBytes, byteOffset, end, searchByte); - - return index == -1 ? nil : index; - } - - @Specialization( - guards = { - "!isEmpty(libPattern.getRope(pattern))", - "!isSingleByteString(libPattern.getRope(pattern))", - "!isBrokenCodeRange(libPattern.getRope(pattern), codeRangeNode)", - "canMemcmp(libString.getRope(string), libPattern.getRope(pattern), singleByteNode)" }) - protected Object stringIndexMultiBytePattern(Object string, Object pattern, int byteOffset, - @Cached BytesNode bytesNode, - @Cached BranchProfile matchFoundProfile, - @Cached BranchProfile noMatchProfile, - @Cached LoopConditionProfile loopProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libPattern) { - assert byteOffset >= 0; - - checkEncoding(string, pattern); - - final Rope sourceRope = libString.getRope(string); - final byte[] sourceBytes = bytesNode.execute(sourceRope); - final Rope searchRope = libPattern.getRope(pattern); - final byte[] searchBytes = bytesNode.execute(searchRope); - - int end = sourceRope.byteLength() - searchRope.byteLength(); - - int i = byteOffset; - try { - for (; loopProfile.inject(i <= end); i++) { - if (sourceBytes[i] == searchBytes[0]) { - if (ArrayUtils.regionEquals(sourceBytes, i, searchBytes, 0, searchRope.byteLength())) { - matchFoundProfile.enter(); - return i; - } - } - TruffleSafepoint.poll(this); - } - } finally { - profileAndReportLoopCount(loopProfile, i - byteOffset); - } - - noMatchProfile.enter(); - return nil; - } - - @Specialization( - guards = { - "isBrokenCodeRange(stringsPattern.getRope(pattern), codeRangeNode)" }) - protected Object stringIndexBrokenPattern(Object string, Object pattern, int byteOffset, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsPattern) { - assert byteOffset >= 0; - return nil; - } - - @Specialization( - guards = { - "!isBrokenCodeRange(libPattern.getRope(pattern), codeRangeNode)", - "!canMemcmp(libString.getRope(string), libPattern.getRope(pattern), singleByteNode)" }) - protected Object stringIndexGeneric(Object string, Object pattern, int byteOffset, - @Cached ByteIndexFromCharIndexNode byteIndexFromCharIndexNode, - @Cached StringByteCharacterIndexNode byteIndexToCharIndexNode, - @Cached NormalizeIndexNode normalizeIndexNode, - @Cached ConditionProfile badIndexProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libPattern) { - assert byteOffset >= 0; - - checkEncoding(string, pattern); - - // Rubinius will pass in a byte index for the `start` value, but StringSupport.index requires a character index. - final int charIndex = byteIndexToCharIndexNode.executeStringByteCharacterIndex(string, byteOffset); + int patternByteIndex = indexOfStringNode.execute(string, patternTString, byteOffset, stringByteLength, + compatibleEncoding.tencoding); - final Rope stringRope = libString.getRope(string); - final int index = index( - stringRope, - libPattern.getRope(pattern), - charIndex, - stringRope.getEncoding(), - normalizeIndexNode, - byteIndexFromCharIndexNode); - - if (badIndexProfile.profile(index == -1)) { + if (notFoundProfile.profile(patternByteIndex < 0)) { return nil; } - return index; - } - - @TruffleBoundary - private int index(Rope source, Rope other, int byteOffset, Encoding enc, NormalizeIndexNode normalizeIndexNode, - ByteIndexFromCharIndexNode byteIndexFromCharIndexNode) { - // Taken from org.jruby.util.StringSupport.index. - assert byteOffset >= 0; - - int sourceLen = source.characterLength(); - int otherLen = other.characterLength(); - - byteOffset = normalizeIndexNode.executeNormalize(byteOffset, sourceLen); - - if (sourceLen - byteOffset < otherLen) { - return -1; - } - byte[] bytes = source.getBytes(); - int p = 0; - final int end = source.byteLength(); - if (byteOffset != 0) { - if (!source.isSingleByteOptimizable()) { - final int pp = byteIndexFromCharIndexNode.execute(source, 0, byteOffset); - byteOffset = StringSupport.offset(0, end, pp); - } - p += byteOffset; - } - if (otherLen == 0) { - return byteOffset; - } - - while (true) { - int pos = indexOf(source, other, p); - if (pos < 0) { - return pos; - } - pos -= p; - int t = enc.rightAdjustCharHead(bytes, p, p + pos, end); - if (t == p + pos) { - return pos + byteOffset; - } - if ((sourceLen -= t - p) <= 0) { - return -1; - } - byteOffset += t - p; - p = t; - } - } - - @TruffleBoundary - private int indexOf(Rope sourceRope, Rope otherRope, int fromIndex) { - // Taken from org.jruby.util.ByteList.indexOf. - - final byte[] source = sourceRope.getBytes(); - final int sourceOffset = 0; - final int sourceCount = sourceRope.byteLength(); - final byte[] target = otherRope.getBytes(); - final int targetOffset = 0; - final int targetCount = otherRope.byteLength(); - - if (fromIndex >= sourceCount) { - return (targetCount == 0 ? sourceCount : -1); - } - if (fromIndex < 0) { - fromIndex = 0; - } - if (targetCount == 0) { - return fromIndex; - } - - byte first = target[targetOffset]; - int max = sourceOffset + (sourceCount - targetCount); - - for (int i = sourceOffset + fromIndex; i <= max; i++) { - if (source[i] != first) { - while (++i <= max && source[i] != first) { - } - } - - if (i <= max) { - int j = i + 1; - int end = j + targetCount - 1; - for (int k = targetOffset + 1; j < end && source[j] == target[k]; j++, k++) { - } - - if (j == end) { - return i - sourceOffset; - } - } - } - return -1; - } - - private void checkEncoding(Object string, Object pattern) { - if (checkEncodingNode == null) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - checkEncodingNode = insert(CheckEncodingNode.create()); - } - - checkEncodingNode.executeCheckEncoding(string, pattern); + return patternByteIndex; } } @Primitive(name = "string_byte_character_index", lowerFixnum = 1) - @ImportStatic(StringGuards.class) public abstract static class StringByteCharacterIndexNode extends PrimitiveArrayArgumentsNode { - - @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode - .create(); - - public abstract int executeStringByteCharacterIndex(Object string, int byteIndex); - - public static StringByteCharacterIndexNode create() { - return StringByteCharacterIndexNodeFactory.create(null); - } - - @Specialization( - guards = { - "isSingleByteOptimizable(strings.getRope(string), singleByteOptimizableNode)" }) - protected int singleByte(Object string, int byteIndex, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - return byteIndex; - } - - @Specialization( - guards = { - "!isSingleByteOptimizable(libString.getRope(string), singleByteOptimizableNode)", - "isFixedWidthEncoding(libString.getRope(string))" }) - protected int fixedWidth(Object string, int byteIndex, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - return byteIndex / libString.getRope(string).getEncoding().minLength(); - } - - @Specialization( - guards = { - "!isSingleByteOptimizable(libString.getRope(string), singleByteOptimizableNode)", - "!isFixedWidthEncoding(libString.getRope(string))", - "isValidUtf8(libString.getRope(string), codeRangeNode)" }) - protected int validUtf8(Object string, int byteIndex, - @Cached CodeRangeNode codeRangeNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - // Taken from Rubinius's String::find_byte_character_index. - // TODO (nirvdrum 02-Apr-15) There's a way to optimize this for UTF-8, but porting all that code isn't necessary at the moment. - return notValidUtf8(string, byteIndex, codeRangeNode, libString); - } - - @TruffleBoundary - @Specialization( - guards = { - "!isSingleByteOptimizable(libString.getRope(string), singleByteOptimizableNode)", - "!isFixedWidthEncoding(libString.getRope(string))", - "!isValidUtf8(libString.getRope(string), codeRangeNode)" }) - protected int notValidUtf8(Object string, int byteIndex, - @Cached CodeRangeNode codeRangeNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - // Taken from Rubinius's String::find_byte_character_index and Encoding::find_byte_character_index. - - final Rope rope = libString.getRope(string); - final byte[] bytes = rope.getBytes(); - final Encoding encoding = rope.getEncoding(); - final CodeRange codeRange = rope.getCodeRange(); - int p = 0; - final int end = bytes.length; - int charIndex = 0; - - while (p < end && byteIndex > 0) { - final int charLen = StringSupport.characterLength(encoding, codeRange, bytes, p, end, true); - p += charLen; - byteIndex -= charLen; - charIndex++; - } - - return charIndex; + @Specialization + protected int byteIndexToCodePointIndex(Object string, int byteIndex, + @Cached RubyStringLibrary libString, + @Cached TruffleString.ByteIndexToCodePointIndexNode byteIndexToCodePointIndexNode, + @Bind("libString.getTString(string)") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding) { + return byteIndexToCodePointIndexNode.execute(tstring, 0, byteIndex, encoding.tencoding); } } /** Search pattern in string starting after offset characters, and return a character index or nil */ @Primitive(name = "string_character_index", lowerFixnum = 2) - @NodeChild(value = "string", type = RubyBaseNodeWithExecute.class) - @NodeChild(value = "pattern", type = RubyBaseNodeWithExecute.class) - @NodeChild(value = "offset", type = RubyNode.class) - public abstract static class StringCharacterIndexNode extends PrimitiveNode { + public abstract static class StringCharacterIndexNode extends PrimitiveArrayArgumentsNode { + protected final RubyStringLibrary libString = RubyStringLibrary.create(); + protected final RubyStringLibrary libPattern = RubyStringLibrary.create(); @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode.create(); - @CreateCast("string") - protected RubyBaseNodeWithExecute coerceStringToRope(RubyBaseNodeWithExecute string) { - return ToRopeNodeGen.create(string); - } - - @CreateCast("pattern") - protected RubyBaseNodeWithExecute coercePatternToRope(RubyBaseNodeWithExecute pattern) { - return ToRopeNodeGen.create(pattern); - } + @Specialization(guards = "singleByteOptimizableNode.execute(string, stringEncoding)") + protected Object singleByteOptimizable(Object rubyString, Object rubyPattern, int codePointOffset, + @Bind("libString.getTString(rubyString)") AbstractTruffleString string, + @Bind("libString.getEncoding(rubyString)") RubyEncoding stringEncoding, + @Bind("libPattern.getTString(rubyPattern)") AbstractTruffleString pattern, + @Bind("libPattern.getEncoding(rubyPattern)") RubyEncoding patternEncoding, + @Cached TruffleString.ByteIndexOfStringNode byteIndexOfStringNode, + @Cached ConditionProfile foundProfile) { - @Specialization( - guards = "singleByteOptimizableNode.execute(stringRope)") - protected Object singleByteOptimizable(Rope stringRope, Rope patternRope, int offset, - @Cached @Shared("stringBytesNode") BytesNode stringBytesNode, - @Cached @Shared("patternBytesNode") BytesNode patternBytesNode, - @Cached LoopConditionProfile loopProfile) { + assert codePointOffset >= 0; - assert offset >= 0; - assert offset + patternRope.byteLength() <= stringRope - .byteLength() : "already checked in the caller, String#index"; + // When single-byte optimizable, the byte length and the codepoint length are the same. + int stringByteLength = string.byteLength(stringEncoding.tencoding); - int p = offset; - final int e = stringRope.byteLength(); - final int pe = patternRope.byteLength(); - final int l = e - pe + 1; + assert codePointOffset + pattern.byteLength( + patternEncoding.tencoding) <= stringByteLength : "already checked in the caller, String#index"; - final byte[] stringBytes = stringBytesNode.execute(stringRope); - final byte[] patternBytes = patternBytesNode.execute(patternRope); + int found = byteIndexOfStringNode.execute(string, pattern, codePointOffset, + stringByteLength, + stringEncoding.tencoding); - try { - for (; loopProfile.inject(p < l); p++) { - if (ArrayUtils.regionEquals(stringBytes, p, patternBytes, 0, pe)) { - return p; - } - TruffleSafepoint.poll(this); - } - } finally { - profileAndReportLoopCount(loopProfile, p - offset); + if (foundProfile.profile(found >= 0)) { + return found; } return nil; } - @TruffleBoundary - @Specialization( - guards = "!singleByteOptimizableNode.execute(stringRope)") - protected Object multiByte(Rope stringRope, Rope patternRope, int offset, - @Cached CalculateCharacterLengthNode calculateCharacterLengthNode, - @Cached @Shared("stringBytesNode") BytesNode stringBytesNode, - @Cached @Shared("patternBytesNode") BytesNode patternBytesNode) { + @Specialization(guards = "!singleByteOptimizableNode.execute(string, stringEncoding)") + protected Object multiByte(Object rubyString, Object rubyPattern, int codePointOffset, + @Bind("libString.getTString(rubyString)") AbstractTruffleString string, + @Bind("libString.getEncoding(rubyString)") RubyEncoding stringEncoding, + @Bind("libPattern.getTString(rubyPattern)") AbstractTruffleString pattern, + @Bind("libPattern.getEncoding(rubyPattern)") RubyEncoding patternEncoding, + @Cached CodePointLengthNode codePointLengthNode, + @Cached TruffleString.IndexOfStringNode indexOfStringNode, + @Cached ConditionProfile foundProfile) { - assert offset >= 0; - assert offset + patternRope.byteLength() <= stringRope - .byteLength() : "already checked in the caller, String#index"; + assert codePointOffset >= 0; + assert codePointOffset + pattern.codePointLengthUncached(patternEncoding.tencoding) <= string + .codePointLengthUncached(stringEncoding.tencoding) : "already checked in the caller, String#index"; - int p = 0; - final int e = stringRope.byteLength(); - final int pe = patternRope.byteLength(); - final int l = e - pe + 1; - - final byte[] stringBytes = stringBytesNode.execute(stringRope); - final byte[] patternBytes = patternBytesNode.execute(patternRope); - - final Encoding enc = stringRope.getEncoding(); - final CodeRange cr = stringRope.getCodeRange(); - int c = 0; - int index = 0; - - while (p < e && index < offset) { - c = calculateCharacterLengthNode.characterLength(enc, cr, Bytes.fromRange(stringBytes, p, e)); - if (StringSupport.MBCLEN_CHARFOUND_P(c)) { - p += c; - index++; - } else { - return nil; - } - } + int stringCodePointLength = codePointLengthNode.execute(string, stringEncoding.tencoding); + int found = indexOfStringNode.execute(string, pattern, codePointOffset, stringCodePointLength, + stringEncoding.tencoding); - for (; p < l; p += c, ++index) { - c = calculateCharacterLengthNode.characterLength(enc, cr, Bytes.fromRange(stringBytes, p, e)); - if (!StringSupport.MBCLEN_CHARFOUND_P(c)) { - return nil; - } - if (ArrayUtils.regionEquals(stringBytes, p, patternBytes, 0, pe)) { - return index; - } + if (foundProfile.profile(found >= 0)) { + return found; } return nil; @@ -4640,193 +3830,48 @@ protected Object multiByte(Rope stringRope, Rope patternRope, int offset, /** Search pattern in string starting after offset bytes, and return a byte index or nil */ @Primitive(name = "string_byte_index", lowerFixnum = 2) - @NodeChild(value = "string", type = RubyBaseNodeWithExecute.class) - @NodeChild(value = "pattern", type = RubyBaseNodeWithExecute.class) - @NodeChild(value = "offset", type = RubyNode.class) - public abstract static class StringByteIndexNode extends PrimitiveNode { + public abstract static class StringByteIndexNode extends PrimitiveArrayArgumentsNode { - @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode.create(); - - @CreateCast("string") - protected RubyBaseNodeWithExecute coerceStringToRope(RubyBaseNodeWithExecute string) { - return ToRopeNodeGen.create(string); - } - - @CreateCast("pattern") - protected RubyBaseNodeWithExecute coercePatternToRope(RubyBaseNodeWithExecute pattern) { - return ToRopeNodeGen.create(pattern); - } - - @Specialization(guards = "!patternFits(stringRope, patternRope, offset)") - protected Object patternTooLarge(Rope stringRope, Rope patternRope, int offset) { - assert offset >= 0; - return nil; - } + @Specialization + protected Object stringByteIndex(Object rubyString, Object rubyPattern, int byteOffset, + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary libPattern, + @Cached TruffleString.ByteIndexOfStringNode byteIndexOfStringNode, + @Cached ConditionProfile indexOutOfBoundsProfile, + @Cached ConditionProfile foundProfile) { + assert byteOffset >= 0; - @Specialization( - guards = { - "singleByteOptimizableNode.execute(stringRope)", - "patternFits(stringRope, patternRope, offset)" }) - protected Object singleByteOptimizable(Rope stringRope, Rope patternRope, int offset, - @Cached @Shared("stringBytesNode") BytesNode stringBytesNode, - @Cached @Shared("patternBytesNode") BytesNode patternBytesNode, - @Cached LoopConditionProfile loopProfile) { - - assert offset >= 0; - int p = offset; - final int e = stringRope.byteLength(); - final int pe = patternRope.byteLength(); - final int l = e - pe + 1; + var string = libString.getTString(rubyString); + var stringEncoding = libString.getEncoding(rubyString).tencoding; + int stringByteLength = string.byteLength(stringEncoding); - final byte[] stringBytes = stringBytesNode.execute(stringRope); - final byte[] patternBytes = patternBytesNode.execute(patternRope); + var pattern = libPattern.getTString(rubyPattern); + var patternEncoding = libPattern.getEncoding(rubyPattern).tencoding; + int patternByteLength = pattern.byteLength(patternEncoding); - try { - for (; loopProfile.inject(p < l); p++) { - if (ArrayUtils.regionEquals(stringBytes, p, patternBytes, 0, pe)) { - return p; - } - TruffleSafepoint.poll(this); - } - } finally { - profileAndReportLoopCount(loopProfile, p - offset); + if (indexOutOfBoundsProfile.profile(byteOffset + patternByteLength > stringByteLength)) { + return nil; } - return nil; - } - - @TruffleBoundary - @Specialization( - guards = { - "!singleByteOptimizableNode.execute(stringRope)", - "patternFits(stringRope, patternRope, offset)" }) - protected Object multiByte(Rope stringRope, Rope patternRope, int offset, - @Cached CalculateCharacterLengthNode calculateCharacterLengthNode, - @Cached @Shared("stringBytesNode") BytesNode stringBytesNode, - @Cached @Shared("patternBytesNode") BytesNode patternBytesNode) { - - assert offset >= 0; - int p = offset; - final int e = stringRope.byteLength(); - final int pe = patternRope.byteLength(); - final int l = e - pe + 1; - - final byte[] stringBytes = stringBytesNode.execute(stringRope); - final byte[] patternBytes = patternBytesNode.execute(patternRope); - - final Encoding enc = stringRope.getEncoding(); - final CodeRange cr = stringRope.getCodeRange(); - int c; - - for (; p < l; p += c) { - c = calculateCharacterLengthNode.characterLength(enc, cr, Bytes.fromRange(stringBytes, p, e)); - if (!StringSupport.MBCLEN_CHARFOUND_P(c)) { - return nil; - } - if (ArrayUtils.regionEquals(stringBytes, p, patternBytes, 0, pe)) { - return p; - } + int found = byteIndexOfStringNode.execute(string, pattern, byteOffset, stringByteLength, stringEncoding); + if (foundProfile.profile(found >= 0)) { + return found; } return nil; } - - protected boolean patternFits(Rope stringRope, Rope patternRope, int offset) { - return offset + patternRope.byteLength() <= stringRope.byteLength(); - } - } - - /** Calculates the byte offset of a character, indicated by a character index, starting from a provided byte offset - * into the rope. Providing a 0 starting offset simply finds the byte offset for the nth character into the rope, - * according to the rope's encoding. Providing a non-zero starting byte offset effectively allows for calculating a - * character's byte offset into a substring of the rope without having to creating a SubstringRope. - * - * @rope - The rope/string being indexed. - * @startByteOffset - Starting position in the rope for the calculation of the character's byte offset. - * @characterIndex - The character index into the rope, starting from the provided byte offset. */ - @ImportStatic({ RopeGuards.class, StringGuards.class, StringOperations.class }) - public abstract static class ByteIndexFromCharIndexNode extends RubyBaseNode { - - public static ByteIndexFromCharIndexNode create() { - return ByteIndexFromCharIndexNodeGen.create(); - } - - @Child protected SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode.create(); - - public abstract int execute(Rope rope, int startByteOffset, int characterIndex); - - @Specialization(guards = "isSingleByteOptimizable(rope)") - protected int singleByteOptimizable(Rope rope, int startByteOffset, int characterIndex) { - return startByteOffset + characterIndex; - } - - @Specialization(guards = { "!isSingleByteOptimizable(rope)", "isFixedWidthEncoding(rope)" }) - protected int fixedWidthEncoding(Rope rope, int startByteOffset, int characterIndex) { - final Encoding encoding = rope.getEncoding(); - return startByteOffset + characterIndex * encoding.minLength(); - } - - @Specialization( - guards = { "!isSingleByteOptimizable(rope)", "!isFixedWidthEncoding(rope)", "characterIndex == 0" }) - protected int multiByteZeroIndex(Rope rope, int startByteOffset, int characterIndex) { - return startByteOffset; - } - - @Specialization(guards = { "!isSingleByteOptimizable(rope)", "!isFixedWidthEncoding(rope)" }) - protected int multiBytes(Rope rope, int startByteOffset, int characterIndex, - @Cached ConditionProfile indexTooLargeProfile, - @Cached ConditionProfile invalidByteProfile, - @Cached BytesNode bytesNode, - @Cached CalculateCharacterLengthNode calculateCharacterLengthNode, - @Cached CodeRangeNode codeRangeNode) { - // Taken from Rubinius's String::byte_index. - - final Encoding enc = rope.getEncoding(); - final byte[] bytes = bytesNode.execute(rope); - final int e = rope.byteLength(); - int p = startByteOffset; - - int i, k = characterIndex; - - for (i = 0; i < k && p < e; i++) { - final int c = calculateCharacterLengthNode - .characterLength(enc, codeRangeNode.execute(rope), Bytes.fromRange(bytes, p, e)); - - // TODO (nirvdrum 22-Dec-16): Consider having a specialized version for CR_BROKEN strings to avoid these checks. - // If it's an invalid byte, just treat it as a single byte - if (invalidByteProfile.profile(!StringSupport.MBCLEN_CHARFOUND_P(c))) { - ++p; - } else { - p += StringSupport.MBCLEN_CHARFOUND_LEN(c); - } - } - - // TODO (nirvdrum 22-Dec-16): Since we specialize elsewhere on index being too large, do we need this? Can character boundary search in a CR_BROKEN string cause us to encounter this case? - if (indexTooLargeProfile.profile(i < k)) { - return -1; - } else { - return p; - } - } - - protected boolean isSingleByteOptimizable(Rope rope) { - return singleByteOptimizableNode.execute(rope); - } - } // Named 'string_byte_index' in Rubinius. @Primitive(name = "string_byte_index_from_char_index", lowerFixnum = 1) - @ImportStatic({ StringGuards.class, StringOperations.class }) public abstract static class StringByteIndexFromCharIndexNode extends PrimitiveArrayArgumentsNode { - @Specialization - protected Object singleByteOptimizable(Object string, int characterIndex, - @Cached ByteIndexFromCharIndexNode byteIndexFromCharIndexNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - return byteIndexFromCharIndexNode.execute(libString.getRope(string), 0, characterIndex); + protected Object byteIndexFromCharIndex(Object string, int characterIndex, + @Cached TruffleString.CodePointIndexToByteIndexNode codePointIndexToByteIndexNode, + @Cached RubyStringLibrary libString) { + return codePointIndexToByteIndexNode.execute(libString.getTString(string), 0, characterIndex, + libString.getTEncoding(string)); } - } // Port of Rubinius's String::previous_byte_index. @@ -4849,22 +3894,22 @@ protected Object zeroIndex(Object string, int index) { @Specialization(guards = { "index > 0", - "isSingleByteOptimizable(strings.getRope(string), singleByteOptimizableNode)" }) + "isSingleByteOptimizable(strings.getTString(string), strings.getEncoding(string), singleByteOptimizableNode)" }) protected int singleByteOptimizable(Object string, int index, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, + @Cached RubyStringLibrary strings, @Cached SingleByteOptimizableNode singleByteOptimizableNode) { return index - 1; } @Specialization(guards = { "index > 0", - "!isSingleByteOptimizable(strings.getRope(string), singleByteOptimizableNode)", - "isFixedWidthEncoding(strings.getRope(string))" }) + "!isSingleByteOptimizable(strings.getTString(string), strings.getEncoding(string), singleByteOptimizableNode)", + "isFixedWidthEncoding(strings.getEncoding(string))" }) protected int fixedWidthEncoding(Object string, int index, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, + @Cached RubyStringLibrary strings, @Cached SingleByteOptimizableNode singleByteOptimizableNode, @Cached ConditionProfile firstCharacterProfile) { - final Encoding encoding = strings.getRope(string).getEncoding(); + final Encoding encoding = strings.getEncoding(string).jcoding; // TODO (nirvdrum 11-Apr-16) Determine whether we need to be bug-for-bug compatible with Rubinius. // Implement a bug in Rubinius. We already special-case the index == 0 by returning nil. For all indices @@ -4880,17 +3925,19 @@ protected int fixedWidthEncoding(Object string, int index, @Specialization(guards = { "index > 0", - "!isSingleByteOptimizable(strings.getRope(string), singleByteOptimizableNode)", - "!isFixedWidthEncoding(strings.getRope(string))" }) + "!isSingleByteOptimizable(strings.getTString(string), strings.getEncoding(string), singleByteOptimizableNode)", + "!isFixedWidthEncoding(strings.getEncoding(string))" }) @TruffleBoundary protected Object other(Object string, int index, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached SingleByteOptimizableNode singleByteOptimizableNode) { - final Rope rope = strings.getRope(string); - final int p = 0; - final int end = p + rope.byteLength(); + @Cached RubyStringLibrary strings, + @Cached SingleByteOptimizableNode singleByteOptimizableNode, + @Cached TruffleString.GetInternalByteArrayNode byteArrayNode) { + var encoding = strings.getEncoding(string); + var byteArray = byteArrayNode.execute(strings.getTString(string), encoding.tencoding); + final int p = byteArray.getOffset(); + final int end = byteArray.getEnd(); - final int b = rope.getEncoding().prevCharHead(rope.getBytes(), p, p + index, end); + final int b = encoding.jcoding.prevCharHead(byteArray.getArray(), p, p + index, end); if (b == -1) { return nil; @@ -4905,295 +3952,131 @@ protected Object other(Object string, int index, @ImportStatic(StringGuards.class) public abstract static class StringRindexPrimitiveNode extends CoreMethodArrayArgumentsNode { - @Child private CheckEncodingNode checkEncodingNode; - @Child CodeRangeNode codeRangeNode = CodeRangeNode.create(); - @Child SingleByteOptimizableNode singleByteNode = SingleByteOptimizableNode.create(); - - @Specialization(guards = { "isEmpty(stringsPattern.getRope(pattern))" }) - protected Object stringRindexEmptyPattern(Object string, Object pattern, int byteOffset, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsPattern) { - assert byteOffset >= 0; - return byteOffset; - } - - @Specialization(guards = { - "isSingleByteString(patternRope)", - "!isBrokenCodeRange(patternRope, codeRangeNode)", - "canMemcmp(libString.getRope(string), patternRope, singleByteNode)" }) - protected Object stringRindexSingleBytePattern(Object string, Object pattern, int byteOffset, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libPattern, - @Bind("libPattern.getRope(pattern)") Rope patternRope, - @Cached BytesNode bytesNode, - @Cached BranchProfile startTooLargeProfile, - @Cached BranchProfile matchFoundProfile, - @Cached BranchProfile noMatchProfile, - @Cached LoopConditionProfile loopProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - assert byteOffset >= 0; - - checkEncoding(string, pattern); - - final Rope sourceRope = libString.getRope(string); - final int end = sourceRope.byteLength(); - final byte[] sourceBytes = bytesNode.execute(sourceRope); - final byte searchByte = bytesNode.execute(patternRope)[0]; - int normalizedStart = byteOffset; - - if (normalizedStart >= end) { - startTooLargeProfile.enter(); - normalizedStart = end - 1; - } - - int i = normalizedStart; - try { - for (; loopProfile.inject(i >= 0); i--) { - if (sourceBytes[i] == searchByte) { - matchFoundProfile.enter(); - return i; - } - TruffleSafepoint.poll(this); - } - } finally { - profileAndReportLoopCount(loopProfile, normalizedStart - i); - } - - noMatchProfile.enter(); - return nil; - } - - @Specialization(guards = { - "!isEmpty(patternRope)", - "!isSingleByteString(patternRope)", - "!isBrokenCodeRange(patternRope, codeRangeNode)", - "canMemcmp(libString.getRope(string), patternRope, singleByteNode)" }) - protected Object stringRindexMultiBytePattern(Object string, Object pattern, int byteOffset, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libPattern, - @Bind("libPattern.getRope(pattern)") Rope patternRope, - @Cached BytesNode bytesNode, + @Specialization + protected Object stringRindex(Object rubyString, Object rubyPattern, int byteOffset, + @Cached RubyStringLibrary libPattern, + @Cached RubyStringLibrary libString, + @Cached CheckEncodingNode checkEncodingNode, + @Cached TruffleString.LastByteIndexOfStringNode lastByteIndexOfStringNode, @Cached BranchProfile startOutOfBoundsProfile, @Cached BranchProfile startTooCloseToEndProfile, - @Cached BranchProfile matchFoundProfile, - @Cached BranchProfile noMatchProfile, - @Cached LoopConditionProfile loopProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached BranchProfile noMatchProfile) { assert byteOffset >= 0; - checkEncoding(string, pattern); + // Throw an exception if the encodings are not compatible. + checkEncodingNode.executeCheckEncoding(rubyString, rubyPattern); + + var string = libString.getTString(rubyString); + var stringEncoding = libString.getEncoding(rubyString).tencoding; + int stringByteLength = string.byteLength(stringEncoding); + + var pattern = libPattern.getTString(rubyPattern); + var patternEncoding = libPattern.getEncoding(rubyPattern).tencoding; + int patternByteLength = pattern.byteLength(patternEncoding); - final Rope sourceRope = libString.getRope(string); - final int end = sourceRope.byteLength(); - final byte[] sourceBytes = bytesNode.execute(sourceRope); - final int matchSize = patternRope.byteLength(); - final byte[] searchBytes = bytesNode.execute(patternRope); int normalizedStart = byteOffset; - if (normalizedStart >= end) { + if (normalizedStart >= stringByteLength) { startOutOfBoundsProfile.enter(); - normalizedStart = end - 1; + normalizedStart = stringByteLength - 1; } - if (end - normalizedStart < matchSize) { + if (stringByteLength - normalizedStart < patternByteLength) { startTooCloseToEndProfile.enter(); - normalizedStart = end - matchSize; - } - - int i = normalizedStart; - try { - for (; loopProfile.inject(i >= 0); i--) { - if (sourceBytes[i] == searchBytes[0]) { - if (ArrayUtils.regionEquals(sourceBytes, i, searchBytes, 0, matchSize)) { - matchFoundProfile.enter(); - return i; - } - } - TruffleSafepoint.poll(this); - } - } finally { - profileAndReportLoopCount(loopProfile, normalizedStart - i); - } - - noMatchProfile.enter(); - return nil; - } - - @Specialization(guards = { "isBrokenCodeRange(stringsPattern.getRope(pattern), codeRangeNode)" }) - protected Object stringRindexBrokenPattern(Object string, Object pattern, int byteOffset, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsPattern) { - assert byteOffset >= 0; - return nil; - } - - @Specialization(guards = { - "!isBrokenCodeRange(patternRope, codeRangeNode)", - "!canMemcmp(libString.getRope(string), patternRope, singleByteNode)" }) - protected Object stringRindex(Object string, Object pattern, int byteOffset, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libPattern, - @Bind("libPattern.getRope(pattern)") Rope patternRope, - @Cached BytesNode stringBytes, - @Cached BytesNode patternBytes, - @Cached GetByteNode patternGetByteNode, - @Cached GetByteNode stringGetByteNode, - @Cached LoopConditionProfile loopProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - // Taken from Rubinius's String::rindex. - assert byteOffset >= 0; - - int pos = byteOffset; - - final Rope stringRope = libString.getRope(string); - final int total = stringRope.byteLength(); - final int matchSize = patternRope.byteLength(); - - if (pos >= total) { - pos = total - 1; - } - - switch (matchSize) { - case 0: { - return byteOffset; - } - - case 1: { - final int matcher = patternGetByteNode.executeGetByte(patternRope, 0); - - while (pos >= 0) { - if (stringGetByteNode.executeGetByte(stringRope, pos) == matcher) { - return pos; - } - - pos--; - } - - return nil; - } - - default: { - if (total - pos < matchSize) { - pos = total - matchSize; - } - - int cur = pos; - - try { - while (loopProfile.inject(cur >= 0)) { - if (ArrayUtils.regionEquals( - stringBytes.execute(stringRope), - cur, - patternBytes.execute(patternRope), - 0, - matchSize)) { - return cur; - } - - cur--; - TruffleSafepoint.poll(this); - } - } finally { - profileAndReportLoopCount(loopProfile, pos - cur); - } - } + normalizedStart = stringByteLength - patternByteLength; } - return nil; - } + int result = lastByteIndexOfStringNode.execute(string, pattern, normalizedStart + patternByteLength, 0, + stringEncoding); - private void checkEncoding(Object string, Object pattern) { - if (checkEncodingNode == null) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - checkEncodingNode = insert(CheckEncodingNode.create()); + if (result < 0) { + noMatchProfile.enter(); + return nil; } - checkEncodingNode.executeCheckEncoding(string, pattern); + return result; } - } @Primitive(name = "string_splice", lowerFixnum = { 2, 3 }) @ImportStatic(StringGuards.class) public abstract static class StringSplicePrimitiveNode extends PrimitiveArrayArgumentsNode { - @Specialization(guards = { "libOther.isRubyString(other)", "indexAtStartBound(spliceByteIndex)" }) + @Specialization(guards = "spliceByteIndex == 0") protected Object splicePrepend( RubyString string, Object other, int spliceByteIndex, int byteCountToReplace, RubyEncoding rubyEncoding, - @Cached SubstringNode prependSubstringNode, - @Cached ConcatNode prependConcatNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libOther) { - final Encoding encoding = rubyEncoding.jcoding; - final Rope original = string.rope; - final Rope left = libOther.getRope(other); - final Rope right = prependSubstringNode - .executeSubstring(original, byteCountToReplace, original.byteLength() - byteCountToReplace); - - final Rope prependResult = prependConcatNode.executeConcat(left, right, encoding); - string.setRope(prependResult, rubyEncoding); + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary libOther, + @Cached TruffleString.SubstringByteIndexNode prependSubstringNode, + @Cached TruffleString.ConcatNode prependConcatNode) { + var original = string.tstring; + var originalTEncoding = libString.getTEncoding(string); + var left = libOther.getTString(other); + var right = prependSubstringNode.execute(original, byteCountToReplace, + original.byteLength(originalTEncoding) - byteCountToReplace, originalTEncoding, true); + + var prependResult = prependConcatNode.execute(left, right, rubyEncoding.tencoding, true); + string.setTString(prependResult, rubyEncoding); return string; } - @Specialization(guards = { "libOther.isRubyString(other)", "indexAtEndBound(string, spliceByteIndex)" }) + @Specialization(guards = "spliceByteIndex == byteLength") protected Object spliceAppend( RubyString string, Object other, int spliceByteIndex, int byteCountToReplace, RubyEncoding rubyEncoding, - @Cached ConcatNode appendConcatNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libOther) { - final Encoding encoding = rubyEncoding.jcoding; - final Rope left = string.rope; - final Rope right = libOther.getRope(other); + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary libOther, + @Cached TruffleString.ConcatNode appendConcatNode, + @Bind("libString.byteLength(string)") int byteLength) { + var left = string.tstring; + var right = libOther.getTString(other); - final Rope concatResult = appendConcatNode.executeConcat(left, right, encoding); - string.setRope(concatResult, rubyEncoding); + var concatResult = appendConcatNode.execute(left, right, rubyEncoding.tencoding, true); + string.setTString(concatResult, rubyEncoding); return string; } - @Specialization(guards = { "libOther.isRubyString(other)", "!indexAtEitherBounds(string, spliceByteIndex)" }) + @Specialization(guards = { "spliceByteIndex != 0", "spliceByteIndex != byteLength" }) protected RubyString splice( RubyString string, Object other, int spliceByteIndex, int byteCountToReplace, RubyEncoding rubyEncoding, + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary libOther, @Cached ConditionProfile insertStringIsEmptyProfile, @Cached ConditionProfile splitRightIsEmptyProfile, - @Cached SubstringNode leftSubstringNode, - @Cached SubstringNode rightSubstringNode, - @Cached ConcatNode leftConcatNode, - @Cached ConcatNode rightConcatNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libOther) { - final Encoding encoding = rubyEncoding.jcoding; - final Rope source = string.rope; - final Rope insert = libOther.getRope(other); + @Cached TruffleString.SubstringByteIndexNode leftSubstringNode, + @Cached TruffleString.SubstringByteIndexNode rightSubstringNode, + @Cached TruffleString.ConcatNode leftConcatNode, + @Cached TruffleString.ConcatNode rightConcatNode, + @Cached TruffleString.ForceEncodingNode forceEncodingNode, + @Bind("libString.byteLength(string)") int byteLength) { + var sourceTEncoding = libString.getTEncoding(string); + var resultTEncoding = rubyEncoding.tencoding; + var source = string.tstring; + var insert = libOther.getTString(other); final int rightSideStartingIndex = spliceByteIndex + byteCountToReplace; - final Rope splitLeft = leftSubstringNode.executeSubstring(source, 0, spliceByteIndex); - final Rope splitRight = rightSubstringNode - .executeSubstring(source, rightSideStartingIndex, source.byteLength() - rightSideStartingIndex); + var splitLeft = leftSubstringNode.execute(source, 0, spliceByteIndex, sourceTEncoding, true); + var splitRight = rightSubstringNode.execute(source, rightSideStartingIndex, + source.byteLength(sourceTEncoding) - rightSideStartingIndex, sourceTEncoding, true); - final Rope joinedLeft; + final TruffleString joinedLeft; // always in resultTEncoding if (insertStringIsEmptyProfile.profile(insert.isEmpty())) { - joinedLeft = splitLeft; + joinedLeft = forceEncodingNode.execute(splitLeft, sourceTEncoding, resultTEncoding); } else { - joinedLeft = leftConcatNode.executeConcat(splitLeft, insert, encoding); + joinedLeft = leftConcatNode.execute(splitLeft, insert, resultTEncoding, true); } - final Rope joinedRight; + final TruffleString joinedRight; // always in resultTEncoding if (splitRightIsEmptyProfile.profile(splitRight.isEmpty())) { joinedRight = joinedLeft; } else { - joinedRight = rightConcatNode.executeConcat(joinedLeft, splitRight, encoding); + joinedRight = rightConcatNode.execute(joinedLeft, splitRight, resultTEncoding, true); } - string.setRope(joinedRight, rubyEncoding); + string.setTString(joinedRight, rubyEncoding); return string; } - - protected boolean indexAtStartBound(int index) { - return index == 0; - } - - protected boolean indexAtEndBound(RubyString string, int index) { - return index == string.rope.byteLength(); - } - - protected boolean indexAtEitherBounds(RubyString string, int index) { - return indexAtStartBound(index) || indexAtEndBound(string, index); - } - } @Primitive(name = "string_to_inum", lowerFixnum = 1) @@ -5203,29 +4086,78 @@ protected boolean indexAtEitherBounds(RubyString string, int index) { @NodeChild(value = "raiseOnError", type = RubyNode.class) public abstract static class StringToInumPrimitiveNode extends PrimitiveNode { - @CreateCast("string") - protected RubyBaseNodeWithExecute coerceStringToRope(RubyBaseNodeWithExecute string) { - return ToRopeNodeGen.create(string); - } + @Specialization(guards = "base == 10") + protected Object base10(Object string, int base, boolean strict, boolean raiseOnError, + @Cached RubyStringLibrary libString, + @Cached TruffleString.ParseLongNode parseLongNode, + @Cached BranchProfile notLazyLongProfile, + @Cached FixnumOrBignumNode fixnumOrBignumNode, + @Cached BranchProfile exceptionProfile) { + var tstring = libString.getTString(string); + try { + return parseLongNode.execute(tstring, 10); + } catch (TruffleString.NumberFormatException e) { + notLazyLongProfile.enter(); + var rope = libString.getTString(string); + var encoding = libString.getEncoding(string); + return bytesToInum(rope, encoding, base, strict, raiseOnError, fixnumOrBignumNode, exceptionProfile); + } + } + + @Specialization(guards = "base == 0") + protected Object base0(Object string, int base, boolean strict, boolean raiseOnError, + @Cached RubyStringLibrary libString, + @Cached TruffleString.ParseLongNode parseLongNode, + @Cached TruffleString.CodePointAtByteIndexNode codePointNode, + @Cached ConditionProfile notEmptyProfile, + @Cached BranchProfile notLazyLongProfile, + @Cached FixnumOrBignumNode fixnumOrBignumNode, + @Cached BranchProfile exceptionProfile) { + var tstring = libString.getTString(string); + var enc = libString.getEncoding(string); + var tenc = enc.tencoding; + var len = tstring.byteLength(tenc); + + if (notEmptyProfile.profile(enc.isAsciiCompatible && len >= 1)) { + int first = codePointNode.execute(tstring, 0, tenc, ErrorHandling.RETURN_NEGATIVE); + int second; + if ((first >= '1' && first <= '9') || (len >= 2 && (first == '-' || first == '+') && + (second = codePointNode.execute(tstring, 1, tenc, ErrorHandling.RETURN_NEGATIVE)) >= '1' && + second <= '9')) { + try { + return parseLongNode.execute(tstring, 10); + } catch (TruffleString.NumberFormatException e) { + notLazyLongProfile.enter(); + } + } + } - @Specialization(guards = "isLazyIntRopeOptimizable(rope, fixBase)") - protected int stringToInumIntRope(Rope rope, int fixBase, boolean strict, boolean raiseOnError) { - return ((LazyIntRope) rope).getValue(); + var rope = libString.getTString(string); + var encoding = libString.getEncoding(string); + return bytesToInum(rope, encoding, base, strict, raiseOnError, fixnumOrBignumNode, exceptionProfile); } - @Specialization(guards = "!isLazyIntRopeOptimizable(rope, fixBase)") - protected Object stringToInum(Rope rope, int fixBase, boolean strict, boolean raiseOnError, + @Specialization(guards = { "base != 10", "base != 0" }) + protected Object otherBase(Object string, int base, boolean strict, boolean raiseOnError, + @Cached RubyStringLibrary libString, @Cached FixnumOrBignumNode fixnumOrBignumNode, - @Cached BytesNode bytesNode, @Cached BranchProfile exceptionProfile) { + var rope = libString.getTString(string); + var encoding = libString.getEncoding(string); + return bytesToInum(rope, encoding, base, strict, raiseOnError, fixnumOrBignumNode, exceptionProfile); + } + + private Object bytesToInum(AbstractTruffleString rope, RubyEncoding encoding, int base, boolean strict, + boolean raiseOnError, FixnumOrBignumNode fixnumOrBignumNode, + BranchProfile exceptionProfile) { try { return ConvertBytes.bytesToInum( getContext(), this, fixnumOrBignumNode, - bytesNode, rope, - fixBase, + encoding, + base, strict); } catch (RaiseException e) { exceptionProfile.enter(); @@ -5235,266 +4167,62 @@ protected Object stringToInum(Rope rope, int fixBase, boolean strict, boolean ra throw e; } } - - protected boolean isLazyIntRopeOptimizable(Rope rope, int base) { - return (base == 0 || base == 10) && rope instanceof LazyIntRope; - } } @Primitive(name = "string_byte_append") public abstract static class StringByteAppendPrimitiveNode extends CoreMethodArrayArgumentsNode { - - @Child private ConcatNode concatNode = ConcatNode.create(); - - @Specialization(guards = "libOther.isRubyString(other)") + @Specialization(guards = "libOther.isRubyString(other)", limit = "1") protected RubyString stringByteAppend(RubyString string, Object other, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libOther) { - final Rope left = string.rope; - final Rope right = libOther.getRope(other); - + @Cached RubyStringLibrary libString, + @Cached RubyStringLibrary libOther, + @Cached TruffleString.ConcatNode concatNode, + @Cached TruffleString.ForceEncodingNode forceEncodingNode) { // The semantics of this primitive are such that the original string's byte[] should be extended without // negotiating the encoding. - string.setRope(concatNode.executeConcat(left, right, left.getEncoding())); + var leftEncoding = libString.getEncoding(string); + var left = string.tstring; + var right = forceEncodingNode.execute(libOther.getTString(other), libOther.getTEncoding(other), + leftEncoding.tencoding); + string.setTString(concatNode.execute(left, right, leftEncoding.tencoding, true), leftEncoding); return string; } - } @Primitive(name = "string_substring", lowerFixnum = { 1, 2 }) @ImportStatic(StringGuards.class) public abstract static class StringSubstringPrimitiveNode extends CoreMethodArrayArgumentsNode { - @Child private NormalizeIndexNode normalizeIndexNode = NormalizeIndexNode.create(); - @Child CharacterLengthNode characterLengthNode = CharacterLengthNode.create(); - @Child SingleByteOptimizableNode singleByteOptimizableNode = SingleByteOptimizableNode - .create(); - @Child private SubstringNode substringNode; - - public abstract Object execute(Object string, int index, int length); + public abstract Object execute(Object string, int codePointOffset, int codePointLength); - @Specialization(guards = { - "!indexTriviallyOutOfBounds(libString.getRope(string), characterLengthNode, index, length)", - "noCharacterSearch(libString.getRope(string), singleByteOptimizableNode)" }) - protected Object stringSubstringSingleByte(Object string, int index, int length, - @Cached @Shared("negativeIndexProfile") ConditionProfile negativeIndexProfile, - @Cached @Shared("tooLargeTotalProfile") ConditionProfile tooLargeTotalProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - final Rope rope = libString.getRope(string); - final RubyEncoding encoding = libString.getEncoding(string); - final int ropeCharacterLength = characterLengthNode.execute(rope); - final int normalizedIndex = normalizeIndexNode.executeNormalize(index, ropeCharacterLength); - int characterLength = length; - - if (negativeIndexProfile.profile(normalizedIndex < 0)) { + @Specialization + protected Object stringSubstringGeneric(Object string, int codePointOffset, int codePointLength, + @Cached RubyStringLibrary libString, + @Bind("libString.getTString(string)") AbstractTruffleString tstring, + @Bind("libString.getEncoding(string)") RubyEncoding encoding, + @Cached StringHelperNodes.NormalizeIndexNode normalizeIndexNode, + @Cached CodePointLengthNode codePointLengthNode, + @Cached TruffleString.SubstringNode substringNode, + @Cached ConditionProfile negativeIndexProfile, + @Cached ConditionProfile tooLargeTotalProfile, + @Cached ConditionProfile triviallyOutOfBoundsProfile) { + int stringCodePointLength = codePointLengthNode.execute(tstring, encoding.tencoding); + if (triviallyOutOfBoundsProfile.profile(codePointLength < 0 || codePointOffset > stringCodePointLength)) { return nil; } - if (tooLargeTotalProfile.profile(normalizedIndex + characterLength > ropeCharacterLength)) { - characterLength = ropeCharacterLength - normalizedIndex; - } - - return makeRope(string, encoding, rope, normalizedIndex, characterLength); - } - - @Specialization(guards = { - "!indexTriviallyOutOfBounds(libString.getRope(string), characterLengthNode, index, length)", - "!noCharacterSearch(libString.getRope(string), singleByteOptimizableNode)" }) - protected Object stringSubstringGeneric(Object string, int index, int length, - @Cached @Shared("negativeIndexProfile") ConditionProfile negativeIndexProfile, - @Cached @Shared("tooLargeTotalProfile") ConditionProfile tooLargeTotalProfile, - @Cached @Exclusive ConditionProfile foundSingleByteOptimizableDescendentProfile, - @Cached BranchProfile singleByteOptimizableBaseProfile, - @Cached BranchProfile leafBaseProfile, - @Cached BranchProfile slowSearchProfile, - @Cached ByteIndexFromCharIndexNode byteIndexFromCharIndexNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - final Rope rope = libString.getRope(string); - final RubyEncoding encoding = libString.getEncoding(string); - final int ropeCharacterLength = characterLengthNode.execute(rope); - final int normalizedIndex = normalizeIndexNode.executeNormalize(index, ropeCharacterLength); - int characterLength = length; - - if (negativeIndexProfile.profile(normalizedIndex < 0)) { + int normalizedCodePointOffset = normalizeIndexNode.executeNormalize(codePointOffset, stringCodePointLength); + if (negativeIndexProfile.profile(normalizedCodePointOffset < 0)) { return nil; } - if (tooLargeTotalProfile.profile(normalizedIndex + characterLength > ropeCharacterLength)) { - characterLength = ropeCharacterLength - normalizedIndex; - } - - final SearchResult searchResult = searchForSingleByteOptimizableDescendant( - rope, - normalizedIndex, - characterLength, - singleByteOptimizableBaseProfile, - leafBaseProfile, - slowSearchProfile); - - if (foundSingleByteOptimizableDescendentProfile - .profile(singleByteOptimizableNode.execute(searchResult.rope))) { - return makeRope( - string, - encoding, - searchResult.rope, - searchResult.index, - characterLength); - } - - return stringSubstringMultiByte( - string, - libString, - normalizedIndex, - characterLength, - byteIndexFromCharIndexNode); - } - - @Specialization(guards = { - "indexTriviallyOutOfBounds(strings.getRope(string), characterLengthNode, index, length)" }) - protected Object stringSubstringNegativeLength(Object string, int index, int length, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - return nil; - } - - private SearchResult searchForSingleByteOptimizableDescendant(Rope base, int index, int characterLength, - BranchProfile singleByteOptimizableBaseProfile, - BranchProfile leafBaseProfile, - BranchProfile slowSearchProfile) { - - if (singleByteOptimizableNode.execute(base)) { - singleByteOptimizableBaseProfile.enter(); - return new SearchResult(index, base); - } - - if (base instanceof LeafRope) { - leafBaseProfile.enter(); - return new SearchResult(index, base); - } - - slowSearchProfile.enter(); - return searchForSingleByteOptimizableDescendantSlow(base, index, characterLength); - } - - @TruffleBoundary - private SearchResult searchForSingleByteOptimizableDescendantSlow(Rope base, int index, int characterLength) { - // If we've found something that's single-byte optimizable, we can halt the search. Taking a substring of - // a single byte optimizable rope is a fast operation. - if (base.isSingleByteOptimizable()) { - return new SearchResult(index, base); - } - - if (base instanceof LeafRope) { - return new SearchResult(index, base); - } else if (base instanceof SubstringRope) { - final SubstringRope substringRope = (SubstringRope) base; - if (substringRope.isSingleByteOptimizable()) { - // the substring byte offset is also a character offset - return searchForSingleByteOptimizableDescendantSlow( - substringRope.getChild(), - index + substringRope.getByteOffset(), - characterLength); - } else { - return new SearchResult(index, substringRope); - } - } else if (base instanceof ConcatRope) { - final ConcatRope concatRope = (ConcatRope) base; - - final ConcatState state = concatRope.getState(); - if (state.isFlattened()) { - return new SearchResult(index, base); - } else { - final Rope left = state.left; - final Rope right = state.right; - if (index + characterLength <= left.characterLength()) { - return searchForSingleByteOptimizableDescendantSlow(left, index, characterLength); - } else if (index >= left.characterLength()) { - return searchForSingleByteOptimizableDescendantSlow( - right, - index - left.characterLength(), - characterLength); - } else { - return new SearchResult(index, concatRope); - } - } - } else if (base instanceof RepeatingRope) { - final RepeatingRope repeatingRope = (RepeatingRope) base; - - if (index + characterLength <= repeatingRope.getChild().characterLength()) { - return searchForSingleByteOptimizableDescendantSlow( - repeatingRope.getChild(), - index, - characterLength); - } else { - return new SearchResult(index, repeatingRope); - } - } else if (base instanceof NativeRope) { - final NativeRope nativeRope = (NativeRope) base; - return new SearchResult(index, nativeRope.toLeafRope()); - } else { - throw new UnsupportedOperationException( - "Don't know how to traverse rope type: " + base.getClass().getName()); - } - } - - private Object stringSubstringMultiByte(Object string, RubyStringLibrary libString, int beg, int characterLen, - ByteIndexFromCharIndexNode byteIndexFromCharIndexNode) { - // Taken from org.jruby.RubyString#substr19 & org.jruby.RubyString#multibyteSubstr19. - - final Rope rope = libString.getRope(string); - final RubyEncoding encoding = libString.getEncoding(string); - final int length = rope.byteLength(); - - int p; - final int end = length; - int substringByteLength; - - p = byteIndexFromCharIndexNode.execute(rope, 0, beg); - if (p == end) { - substringByteLength = 0; - } else { - int pp = byteIndexFromCharIndexNode.execute(rope, p, characterLen); - substringByteLength = StringSupport.offset(p, end, pp); - } - - return makeRope(string, encoding, rope, p, substringByteLength); - } - - private RubyString makeRope(Object string, RubyEncoding encoding, Rope rope, int beg, int byteLength) { - if (substringNode == null) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - substringNode = insert(SubstringNode.create()); + int normalizedCodePointLength = codePointLength; + if (tooLargeTotalProfile + .profile(normalizedCodePointOffset + normalizedCodePointLength > stringCodePointLength)) { + normalizedCodePointLength = stringCodePointLength - normalizedCodePointOffset; } - final Rope substringRope = substringNode.executeSubstring(rope, beg, byteLength); - final RubyString ret = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - substringRope, - encoding); - AllocationTracing.trace(ret, this); - return ret; - } - - protected static boolean indexTriviallyOutOfBounds(Rope rope, - CharacterLengthNode characterLengthNode, - int index, int length) { - return (length < 0) || - (index > characterLengthNode.execute(rope)); - } - - protected static boolean noCharacterSearch(Rope rope, - SingleByteOptimizableNode singleByteOptimizableNode) { - return rope.isEmpty() || singleByteOptimizableNode.execute(rope); - } - - private static final class SearchResult { - public final int index; - public final Rope rope; - - public SearchResult(final int index, final Rope rope) { - this.index = index; - this.rope = rope; - } + return createSubString(substringNode, tstring, encoding, normalizedCodePointOffset, + normalizedCodePointLength); } } @@ -5506,73 +4234,30 @@ public abstract static class StringFromByteArrayPrimitiveNode extends CoreMethod @Specialization protected RubyString stringFromByteArray( RubyByteArray byteArray, int start, int count, RubyEncoding rubyEncoding, - @Cached StringNodes.MakeStringNode makeStringNode) { + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { final byte[] bytes = byteArray.bytes; final byte[] array = ArrayUtils.extractRange(bytes, start, start + count); - return makeStringNode.executeMake(array, rubyEncoding, CR_UNKNOWN); - } - - } - - public abstract static class StringAppendNode extends RubyBaseNode { - - @Child private EncodingNodes.CheckStringEncodingNode checkEncodingNode; - @Child private ConcatNode concatNode; - - public static StringAppendNode create() { - return StringAppendNodeGen.create(); - } - - public abstract RopeWithEncoding executeStringAppend(Object string, Object other); - - @Specialization(guards = "libOther.isRubyString(other)") - protected RopeWithEncoding stringAppend(Object string, Object other, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libOther) { - final Rope left = libString.getRope(string); - final Rope right = libOther.getRope(other); - - final RubyEncoding compatibleEncoding = executeCheckEncoding( - stringToRopeWithEncoding(libString, string), - stringToRopeWithEncoding(libOther, other)); - - final Rope result = executeConcat(left, right, compatibleEncoding); - return new RopeWithEncoding(result, compatibleEncoding); - } - - private Rope executeConcat(Rope left, Rope right, RubyEncoding compatibleEncoding) { - if (concatNode == null) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - concatNode = insert(ConcatNode.create()); - } - return concatNode.executeConcat(left, right, compatibleEncoding.jcoding); - } - - private RubyEncoding executeCheckEncoding(RopeWithEncoding string, RopeWithEncoding other) { - if (checkEncodingNode == null) { - CompilerDirectives.transferToInterpreterAndInvalidate(); - checkEncodingNode = insert(EncodingNodes.CheckStringEncodingNode.create()); - } - return checkEncodingNode.executeCheckEncoding(string, other); + return createString(fromByteArrayNode, array, rubyEncoding); } - protected RopeWithEncoding stringToRopeWithEncoding(RubyStringLibrary strings, Object string) { - return new RopeWithEncoding(strings.getRope(string), strings.getEncoding(string)); - } } @Primitive(name = "string_to_null_terminated_byte_array") public abstract static class StringToNullTerminatedByteArrayNode extends PrimitiveArrayArgumentsNode { - @Specialization(guards = "libString.isRubyString(string)") + @Specialization(guards = "libString.isRubyString(string)", limit = "1") protected Object stringToNullTerminatedByteArray(Object string, - @Cached BytesNode bytesNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { + @Cached TruffleString.CopyToByteArrayNode copyToByteArrayNode, + @Cached RubyStringLibrary libString) { + final var encoding = libString.getEncoding(string); + final var tstring = libString.getTString(string); + final int bytesToCopy = tstring.byteLength(encoding.tencoding); + final var bytesWithNull = new byte[bytesToCopy + 1]; + // NOTE: we always need one copy here, as native code could modify the passed byte[] - final byte[] bytes = bytesNode.execute(libString.getRope(string)); - final byte[] bytesWithNull = new byte[bytes.length + 1]; - System.arraycopy(bytes, 0, bytesWithNull, 0, bytes.length); + copyToByteArrayNode.execute(tstring, 0, + bytesWithNull, 0, bytesToCopy, encoding.tencoding); return getContext().getEnv().asGuestValue(bytesWithNull); } @@ -5599,12 +4284,57 @@ protected boolean isInterned(RubyString string) { @Primitive(name = "string_intern") public abstract static class InternNode extends PrimitiveArrayArgumentsNode { - @Specialization protected ImmutableRubyString internString(RubyString string, - @Cached FlattenNode flattenNode) { - final Rope flattened = flattenNode.executeFlatten(string.rope); - return getLanguage().getFrozenStringLiteral(flattened); + @Cached RubyStringLibrary libString, + @Cached TruffleString.AsManagedNode asManagedNode) { + var encoding = libString.getEncoding(string); + TruffleString immutableManagedString = asManagedNode.execute(string.tstring, encoding.tencoding); + return getLanguage().getFrozenStringLiteral(immutableManagedString, encoding); + } + } + + @Primitive(name = "string_truncate", lowerFixnum = 1) + public abstract static class TruncateNode extends PrimitiveArrayArgumentsNode { + + @TruffleBoundary + @Specialization(guards = "newByteLength < 0") + protected RubyString truncateLengthNegative(RubyString string, int newByteLength) { + throw new RaiseException( + getContext(), + getContext().getCoreExceptions().argumentError(formatNegativeError(newByteLength), this)); + } + + @TruffleBoundary + @Specialization(guards = { "newByteLength >= 0", "newByteLength > byteLength" }) + protected RubyString truncateLengthTooLong(RubyString string, int newByteLength, + @Cached RubyStringLibrary libString, + @Bind("libString.byteLength(string)") int byteLength) { + throw new RaiseException( + getContext(), + coreExceptions().argumentError(formatTooLongError(newByteLength, string), this)); + } + + @Specialization(guards = { "newByteLength >= 0", "newByteLength <= byteLength" }) + protected RubyString tuncate(RubyString string, int newByteLength, + @Cached RubyStringLibrary libString, + @Cached TruffleString.SubstringByteIndexNode substringNode, + @Bind("libString.byteLength(string)") int byteLength) { + var tencoding = libString.getTEncoding(string); + string.setTString(substringNode.execute(string.tstring, 0, newByteLength, tencoding, true)); + return string; + } + + @TruffleBoundary + private String formatNegativeError(int count) { + return StringUtils.format("Invalid byte count: %d is negative", count); + } + + @TruffleBoundary + private String formatTooLongError(int count, RubyString string) { + return StringUtils + .format("Invalid byte count: %d exceeds string size of %d bytes", count, + string.byteLengthUncached()); } } diff --git a/src/main/java/org/truffleruby/core/string/StringOperations.java b/src/main/java/org/truffleruby/core/string/StringOperations.java index 43b6556293f6..b6e236d6baab 100644 --- a/src/main/java/org/truffleruby/core/string/StringOperations.java +++ b/src/main/java/org/truffleruby/core/string/StringOperations.java @@ -28,54 +28,39 @@ import java.nio.CharBuffer; import java.nio.charset.Charset; -import com.oracle.truffle.api.nodes.Node; +import com.oracle.truffle.api.strings.AbstractTruffleString; import org.jcodings.Encoding; import org.jcodings.specific.ASCIIEncoding; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.core.array.ArrayOperations; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.LeafRope; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.encoding.TStringUtils; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; -import org.truffleruby.language.objects.AllocationTracing; public abstract class StringOperations { - public static RubyString createUTF8String(RubyContext context, RubyLanguage language, Rope rope) { + public static RubyString createUTF8String(RubyContext context, RubyLanguage language, String string) { final RubyString instance = new RubyString( context.getCoreLibrary().stringClass, language.stringShape, false, - rope, + TStringUtils.utf8TString(string), Encodings.UTF_8); - return instance; - } - public static RubyString createString(Node node, Rope rope, RubyEncoding encoding) { - final RubyString instance = new RubyString( - RubyContext.get(node).getCoreLibrary().stringClass, - RubyLanguage.get(node).stringShape, - false, - rope, - encoding); - AllocationTracing.trace(instance, node); return instance; } - /** Only use when there is no Node to report the allocation */ - public static RubyString createString(RubyContext context, RubyLanguage language, Rope rope, - RubyEncoding encoding) { + public static RubyString createUTF8String(RubyContext context, RubyLanguage language, + AbstractTruffleString string) { final RubyString instance = new RubyString( context.getCoreLibrary().stringClass, language.stringShape, false, - rope, - encoding); + string, + Encodings.UTF_8); + return instance; } @@ -107,20 +92,6 @@ public static byte[] encodeBytes(String value, Encoding encoding) { return bytes; } - public static LeafRope encodeRope(String value, Encoding encoding, CodeRange codeRange) { - if (codeRange == CodeRange.CR_7BIT) { - return RopeOperations.encodeAscii(value, encoding); - } - - final byte[] bytes = encodeBytes(value, encoding); - - return RopeOperations.create(bytes, encoding, codeRange); - } - - public static LeafRope encodeRope(String value, Encoding encoding) { - return encodeRope(value, encoding, CodeRange.CR_UNKNOWN); - } - public static boolean isAsciiOnly(String string) { for (int i = 0; i < string.length(); i++) { int c = string.charAt(i); @@ -130,4 +101,18 @@ public static boolean isAsciiOnly(String string) { } return true; } + + /** Prefer this to {@code getBytes(StandardCharsets.US_ASCII)} */ + public static byte[] encodeAsciiBytes(String value) { + assert isAsciiOnly(value) : "String contained non ascii characters \"" + value + "\""; + + final byte[] bytes = new byte[value.length()]; + + for (int i = 0; i < bytes.length; i++) { + bytes[i] = (byte) value.charAt(i); + } + + return bytes; + } + } diff --git a/src/main/java/org/truffleruby/core/string/StringSupport.java b/src/main/java/org/truffleruby/core/string/StringSupport.java index 8ead606da7d0..74efd38f9f7b 100644 --- a/src/main/java/org/truffleruby/core/string/StringSupport.java +++ b/src/main/java/org/truffleruby/core/string/StringSupport.java @@ -26,14 +26,20 @@ ***** END LICENSE BLOCK *****/ package org.truffleruby.core.string; -import static org.truffleruby.core.rope.CodeRange.CR_7BIT; -import static org.truffleruby.core.rope.CodeRange.CR_BROKEN; -import static org.truffleruby.core.rope.CodeRange.CR_UNKNOWN; -import static org.truffleruby.core.rope.CodeRange.CR_VALID; +import static com.oracle.truffle.api.strings.TruffleString.CodeRange.ASCII; +import static com.oracle.truffle.api.strings.TruffleString.CodeRange.VALID; import java.util.Arrays; +import com.oracle.truffle.api.CompilerDirectives.CompilationFinal; +import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.nodes.Node; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.TruffleString; +import com.oracle.truffle.api.strings.TruffleString.CreateCodePointIteratorNode; +import com.oracle.truffle.api.strings.TruffleString.ErrorHandling; +import com.oracle.truffle.api.strings.TruffleString.FromByteArrayNode; import org.graalvm.collections.Pair; import org.jcodings.Config; import org.jcodings.Encoding; @@ -44,17 +50,12 @@ import org.jcodings.specific.UTF8Encoding; import org.jcodings.util.IntHash; import org.truffleruby.RubyContext; +import org.truffleruby.collections.ByteArrayBuilder; import org.truffleruby.collections.IntHashMap; import org.truffleruby.core.array.ArrayUtils; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.Bytes; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; -import org.truffleruby.core.rope.RopeOperations; - -import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.language.control.RaiseException; import org.truffleruby.utils.Utils; @@ -66,35 +67,36 @@ public final class StringSupport { // exceeding the buffer size. private static final int CASE_MAP_BUFFER_SIZE = 32; - public static int characterLength(Encoding encoding, CodeRange codeRange, byte[] bytes, - int byteOffset, int byteEnd, boolean recoverIfBroken) { + /** codeRange==null means unknown. recoverIfBroken=false so can return negative values. */ + private static int characterLength(Encoding encoding, TruffleString.CodeRange codeRange, byte[] bytes, + int byteOffset, int byteEnd) { assert byteOffset >= 0 && byteOffset < byteEnd && byteEnd <= bytes.length; + if (codeRange == null) { + return preciseLength(encoding, bytes, byteOffset, byteEnd); + } + switch (codeRange) { - case CR_7BIT: + case ASCII: return 1; - case CR_VALID: + case VALID: return characterLengthValid(encoding, bytes, byteOffset, byteEnd); - case CR_BROKEN: - case CR_UNKNOWN: - if (recoverIfBroken) { - return length(encoding, bytes, byteOffset, byteEnd); - } else { - return preciseLength(encoding, bytes, byteOffset, byteEnd); - } + case BROKEN: + return preciseLength(encoding, bytes, byteOffset, byteEnd); default: throw Utils.unsupportedOperation("unknown code range value: ", codeRange); } } - public static int characterLength(Encoding encoding, CodeRange codeRange, byte[] bytes, int byteOffset, - int byteEnd) { - return characterLength(encoding, codeRange, bytes, byteOffset, byteEnd, false); + /** recoverIfBroken=false so can return negative values */ + public static int characterLength(RubyEncoding encoding, byte[] bytes, int byteOffset, int byteEnd) { + assert byteOffset >= 0 && byteOffset < byteEnd && byteEnd <= bytes.length; + return preciseLength(encoding.jcoding, bytes, byteOffset, byteEnd); } private static int characterLengthValid(Encoding encoding, byte[] bytes, int byteOffset, int byteEnd) { if (encoding.isUTF8()) { - return UTF8Operations.charWidth(bytes[byteOffset]); + return utf8CharWidth(bytes[byteOffset]); } else if (encoding.isAsciiCompatible()) { if (bytes[byteOffset] >= 0) { return 1; @@ -110,6 +112,21 @@ private static int characterLengthValid(Encoding encoding, byte[] bytes, int byt } } + public static int utf8CharWidth(byte b) { + if (b >= 0) { + return 1; + } else { + switch (b & 0xf0) { + case 0xe0: + return 3; + case 0xf0: + return 4; + default: + return 2; + } + } + } + /** This method returns the byte length of the first encountered character in `bytes`, starting at offset `p` and * ending at byte position `e`. The `Encoding` implementation will perform character validation and return a @@ -194,6 +211,7 @@ public static boolean MBCLEN_INVALID_P(int r) { // MBCLEN_CHARFOUND_LEN, ONIGENC_MBCLEN_CHARFOUND_LEN public static int MBCLEN_CHARFOUND_LEN(int r) { + assert MBCLEN_CHARFOUND_P(r); return r; } @@ -202,129 +220,35 @@ public static boolean MBCLEN_CHARFOUND_P(int r) { return 0 < r; } - // MRI: search_nonascii - public static int searchNonAscii(byte[] bytes, int p, int end) { - while (p < end) { - if (!Encoding.isAscii(bytes[p])) { - return p; - } - p++; - } - return -1; - } + @CompilationFinal(dimensions = 1) private static final byte[] NON_ASCII_NEEDLE = { (byte) 0b1111_1111 }; + @CompilationFinal(dimensions = 1) private static final byte[] NON_ASCII_MASK = { 0b0111_1111 }; // MRI: search_nonascii - public static int searchNonAscii(Bytes bytes) { - for (int p = 0; p < bytes.length; ++p) { - if (!Encoding.isAscii(bytes.get(p))) { - return p; - } - } - return -1; + /** NOTE: this returns a logical offset, not the offset in the byteArray. */ + public static int searchNonAscii(InternalByteArray byteArray, int start) { + final int offset = byteArray.getOffset(); + return searchNonAscii(byteArray.getArray(), offset + start, byteArray.getEnd()) - offset; } - // MRI: rb_enc_strlen - public static int strLength(Encoding enc, byte[] bytes, int p, int end) { - return strLength(enc, bytes, p, end, CR_UNKNOWN); + // MRI: search_nonascii + public static int searchNonAscii(byte[] bytes, int p, int end) { + return com.oracle.truffle.api.ArrayUtils.indexOfWithOrMask(bytes, p, end - p, NON_ASCII_NEEDLE, NON_ASCII_MASK); } - // MRI: enc_strlen + // MRI: rb_enc_strlen / enc_strlen @TruffleBoundary - public static int strLength(Encoding enc, byte[] bytes, int p, int e, CodeRange cr) { - int c; - if (enc.isFixedWidth()) { - return (e - p + enc.minLength() - 1) / enc.minLength(); - } else if (enc.isAsciiCompatible()) { - c = 0; - if (cr == CR_7BIT || cr == CR_VALID) { - while (p < e) { - if (Encoding.isAscii(bytes[p])) { - int q = searchNonAscii(bytes, p, e); - if (q == -1) { - return c + (e - p); - } - c += q - p; - p = q; - } - p += characterLength(enc, cr, bytes, p, e); - c++; - } - } else { - while (p < e) { - if (Encoding.isAscii(bytes[p])) { - int q = searchNonAscii(bytes, p, e); - if (q == -1) { - return c + (e - p); - } - c += q - p; - p = q; - } - p += characterLength(enc, cr, bytes, p, e, true); - c++; - } - } - return c; - } - - for (c = 0; p < e; c++) { - p += characterLength(enc, cr, bytes, p, e, true); - } - return c; + public static int strLength(RubyEncoding encoding, byte[] bytes, int p, int e) { + var tstring = FromByteArrayNode.getUncached().execute(bytes, p, e - p, encoding.tencoding, false); + return tstring.codePointLengthUncached(encoding.tencoding); } - /** See {@link RopeNodes.CalculateAttributesNode#calculateAttributesAsciiCompatibleGeneric} */ - // MRI: rb_enc_strlen_cr - public static StringAttributes strLengthWithCodeRangeAsciiCompatible(Encoding enc, byte[] bytes, int p, int end) { - CodeRange cr = CR_UNKNOWN; - int c = 0; - while (p < end) { - if (Encoding.isAscii(bytes[p])) { - int q = searchNonAscii(bytes, p, end); - if (q == -1) { - return new StringAttributes(c + (end - p), cr == CR_UNKNOWN ? CR_7BIT : cr); - } - c += q - p; - p = q; - } - int cl = preciseLength(enc, bytes, p, end); - if (cl > 0) { - if (cr != CR_BROKEN) { - cr = CR_VALID; - } - p += cl; - } else { - cr = CR_BROKEN; - p++; - } - c++; - } - return new StringAttributes(c, cr == CR_UNKNOWN ? CR_7BIT : cr); - } - - /** See {@link RopeNodes.CalculateAttributesNode#calculateAttributesNonAsciiCompatible} */ - // MRI: rb_enc_strlen_cr - public static StringAttributes strLengthWithCodeRangeNonAsciiCompatible(Encoding enc, byte[] bytes, int p, - int end) { - CodeRange cr = CR_UNKNOWN; - int c; - for (c = 0; p < end; c++) { - int cl = preciseLength(enc, bytes, p, end); - if (cl > 0) { - if (cr != CR_BROKEN) { - cr = CR_VALID; - } - p += cl; - } else { - cr = CR_BROKEN; - p += enc.minLength(); - } - } - - return new StringAttributes(c, cr == CR_UNKNOWN ? CR_7BIT : cr); + public static int codePoint(Encoding enc, byte[] bytes, int p, int end, Node node) { + return codePoint(enc, null, bytes, p, end, node); } @TruffleBoundary - public static int codePoint(Encoding enc, CodeRange codeRange, byte[] bytes, int p, int end, Node node) { + public static int codePoint(Encoding enc, TruffleString.CodeRange codeRange, byte[] bytes, int p, int end, + Node node) { if (p >= end) { final RubyContext context = RubyContext.get(node); throw new RaiseException(context, context.getCoreExceptions().argumentError("empty string", node)); @@ -347,38 +271,14 @@ public static int codeLength(Encoding enc, int c) { } @TruffleBoundary - public static int codeToMbc(Encoding encoding, int code, byte[] bytes, int p) { - return encoding.codeToMbc(code, bytes, p); - } - - @TruffleBoundary - public static int preciseCodePoint(Encoding enc, CodeRange codeRange, byte[] bytes, int p, int end) { - int l = characterLength(enc, codeRange, bytes, p, end); - if (l > 0) { - return enc.mbcToCode(bytes, p, end); - } - return -1; - } - - @TruffleBoundary - public static int mbcToCode(Encoding encoding, Rope rope, int p, int end) { - return encoding.mbcToCode(rope.getBytes(), p, end); + public static int mbcToCode(Encoding encoding, byte[] bytes, int p, int end) { + return encoding.mbcToCode(bytes, p, end); } public static int offset(int start, int end, int charEnd) { return charEnd == -1 ? end - start : Math.min(end, charEnd) - start; } - public static int caseCmp(byte[] bytes1, int p1, byte[] bytes2, int p2, int len) { - int i = -1; - for (; ++i < len && bytes1[p1 + i] == bytes2[p2 + i];) { - } - if (i < len) { - return (bytes1[p1 + i] & 0xff) > (bytes2[p2 + i] & 0xff) ? 1 : -1; - } - return 0; - } - public static int scanHex(byte[] bytes, int p, int len) { return scanHex(bytes, p, len, ASCIIEncoding.INSTANCE); } @@ -458,10 +358,11 @@ public static String escapedCharFormat(int c, boolean isUnicode) { /** rb_str_count */ @TruffleBoundary - public static int strCount(Rope str, boolean[] table, TrTables tables, Encoding enc, Node node) { - final byte[] bytes = str.getBytes(); - int p = 0; - final int end = str.byteLength(); + public static int strCount(InternalByteArray byteArray, TruffleString.CodeRange codeRange, boolean[] table, + TrTables tables, Encoding enc, Node node) { + final byte[] bytes = byteArray.getArray(); + int p = byteArray.getOffset(); + final int end = byteArray.getEnd(); final boolean asciiCompat = enc.isAsciiCompatible(); int count = 0; @@ -473,7 +374,7 @@ public static int strCount(Rope str, boolean[] table, TrTables tables, Encoding } p++; } else { - c = codePoint(enc, str.getCodeRange(), bytes, p, end, node); + c = codePoint(enc, codeRange, bytes, p, end, node); int cl = codeLength(enc, c); if (trFind(c, table, tables)) { count++; @@ -485,12 +386,80 @@ public static int strCount(Rope str, boolean[] table, TrTables tables, Encoding return count; } + public static char[] bytesToChars(InternalByteArray byteArray) { + final int byteLength = byteArray.getLength(); + final char[] chars = new char[byteLength]; + + for (int n = 0; n < byteLength; n++) { + chars[n] = (char) byteArray.get(n); + } + + return chars; + } + + // rb_enc_ascget + private static int encAscget(byte[] pBytes, int p, int e, int[] len, Encoding enc, + TruffleString.CodeRange codeRange) { + int c; + int l; + + if (e <= p) { + return -1; + } + + if (EncodingUtils.encAsciicompat(enc)) { + c = pBytes[p] & 0xFF; + if (!Encoding.isAscii((byte) c)) { + return -1; + } + if (len != null) { + len[0] = 1; + } + return c; + } + l = characterLength(enc, codeRange, pBytes, p, e); + if (!MBCLEN_CHARFOUND_P(l)) { + return -1; + } + c = enc.mbcToCode(pBytes, p, e); + if (!Encoding.isAscii(c)) { + return -1; + } + if (len != null) { + len[0] = l; + } + return c; + } + + // rb_enc_codepoint_len + @TruffleBoundary + private static int encCodepointLength(byte[] pBytes, int p, int e, int[] len_p, Encoding enc, + TruffleString.CodeRange codeRange, Node node) { + int r; + if (e <= p) { + final RubyContext context = RubyContext.get(node); + throw new RaiseException(context, context.getCoreExceptions().argumentError("empty string", node)); + } + r = characterLength(enc, codeRange, pBytes, p, e); + if (!MBCLEN_CHARFOUND_P(r)) { + final RubyContext context = RubyContext.get(node); + throw new RaiseException( + context, + context.getCoreExceptions().argumentError("invalid byte sequence in " + enc, node)); + } + if (len_p != null) { + len_p[0] = MBCLEN_CHARFOUND_LEN(r); + } + return codePoint(enc, codeRange, pBytes, p, e, node); + } + /** rb_str_tr / rb_str_tr_bang */ public static final class TR { - public TR(Rope bytes) { - p = 0; - pend = bytes.byteLength() + p; - buf = bytes.getBytes(); + public TR(AbstractTruffleString string, RubyEncoding encoding) { + var bytes = string.getInternalByteArrayUncached(encoding.tencoding); + p = bytes.getOffset(); + pend = bytes.getEnd(); + buf = bytes.getArray(); now = max = 0; gen = false; } @@ -508,14 +477,16 @@ public static final class TrTables { private static final Object DUMMY_VALUE = ""; @TruffleBoundary - public static TrTables trSetupTable(Rope str, boolean[] stable, TrTables tables, boolean first, Encoding enc, - Node node) { + public static TrTables trSetupTable(AbstractTruffleString str, RubyEncoding encoding, boolean[] stable, + TrTables tables, boolean first, Encoding enc, Node node) { int i, l[] = { 0 }; final boolean cflag; - final TR tr = new TR(str); + final TR tr = new TR(str, encoding); - if (str.byteLength() > 1 && EncodingUtils.encAscget(tr.buf, tr.p, tr.pend, l, enc, str.getCodeRange()) == '^') { + var codeRange = str.getByteCodeRangeUncached(encoding.tencoding); + if (str.byteLength(encoding.tencoding) > 1 && + encAscget(tr.buf, tr.p, tr.pend, l, enc, codeRange) == '^') { cflag = true; tr.p += l[0]; } else { @@ -539,7 +510,7 @@ public static TrTables trSetupTable(Rope str, boolean[] stable, TrTables tables, IntHashMap table = null, ptable = null; int c; - while ((c = trNext(tr, enc, str.getCodeRange(), node)) != -1) { + while ((c = trNext(tr, enc, codeRange, node)) != -1) { if (c < TRANS_SIZE) { if (buf == null) { // initialize buf buf = new byte[TRANS_SIZE]; @@ -614,7 +585,7 @@ public static boolean trFind(final int c, final boolean[] table, final TrTables } @TruffleBoundary - public static int trNext(TR tr, Encoding enc, CodeRange codeRange, Node node) { + public static int trNext(TR tr, Encoding enc, TruffleString.CodeRange codeRange, Node node) { for (;;) { if (!tr.gen) { return trNext_nextpart(tr, enc, codeRange, node); @@ -635,21 +606,21 @@ public static int trNext(TR tr, Encoding enc, CodeRange codeRange, Node node) { } } - private static int trNext_nextpart(TR tr, Encoding enc, CodeRange codeRange, Node node) { + private static int trNext_nextpart(TR tr, Encoding enc, TruffleString.CodeRange codeRange, Node node) { final int[] n = { 0 }; if (tr.p == tr.pend) { return -1; } - if (EncodingUtils.encAscget(tr.buf, tr.p, tr.pend, n, enc, codeRange) == '\\' && tr.p + n[0] < tr.pend) { + if (encAscget(tr.buf, tr.p, tr.pend, n, enc, codeRange) == '\\' && tr.p + n[0] < tr.pend) { tr.p += n[0]; } - tr.now = EncodingUtils.encCodepointLength(tr.buf, tr.p, tr.pend, n, enc, codeRange, node); + tr.now = encCodepointLength(tr.buf, tr.p, tr.pend, n, enc, codeRange, node); tr.p += n[0]; - if (EncodingUtils.encAscget(tr.buf, tr.p, tr.pend, n, enc, codeRange) == '-' && tr.p + n[0] < tr.pend) { + if (encAscget(tr.buf, tr.p, tr.pend, n, enc, codeRange) == '-' && tr.p + n[0] < tr.pend) { tr.p += n[0]; if (tr.p < tr.pend) { - int c = EncodingUtils.encCodepointLength(tr.buf, tr.p, tr.pend, n, enc, codeRange, node); + int c = encCodepointLength(tr.buf, tr.p, tr.pend, n, enc, codeRange, node); tr.p += n[0]; if (tr.now > c) { final RubyContext context = RubyContext.get(node); @@ -678,14 +649,15 @@ public enum NeighborChar { // MRI: str_succ @TruffleBoundary - public static RopeBuilder succCommon(Rope original, Node node) { + public static TStringBuilder succCommon(RubyString original, Node node) { byte carry[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN]; int carryP = 0; carry[0] = 1; int carryLen = 1; - Encoding enc = original.getEncoding(); - RopeBuilder valueCopy = RopeBuilder.createRopeBuilder(original.getBytes(), enc); + final RubyEncoding encoding = original.getEncodingUncached(); + final Encoding enc = encoding.jcoding; + TStringBuilder valueCopy = TStringBuilder.create(original); int p = 0; int end = p + valueCopy.getLength(); int s = end; @@ -705,11 +677,11 @@ public static RopeBuilder succCommon(Rope original, Node node) { } } - int cl = characterLength(enc, CR_UNKNOWN, bytes, s, end); + int cl = characterLength(encoding, bytes, s, end); if (cl <= 0) { continue; } - switch (neighbor = succAlnumChar(enc, bytes, s, cl, carry, 0, node)) { + switch (neighbor = succAlnumChar(encoding, bytes, s, cl, carry, 0, node)) { case NOT_CHAR: continue; case FOUND: @@ -725,18 +697,18 @@ public static RopeBuilder succCommon(Rope original, Node node) { if (!alnumSeen) { s = end; while ((s = enc.prevCharHead(bytes, p, s, end)) != -1) { - int cl = characterLength(enc, CR_UNKNOWN, bytes, s, end); + int cl = characterLength(encoding, bytes, s, end); if (cl <= 0) { continue; } - neighbor = succChar(enc, bytes, s, cl, node); + neighbor = succChar(encoding, bytes, s, cl, node); if (neighbor == NeighborChar.FOUND) { return valueCopy; } - if (characterLength(enc, CR_UNKNOWN, bytes, s, s + 1) != cl) { - succChar(enc, bytes, s, cl, node); /* wrapped to \0...\0. search next valid char. */ + if (characterLength(encoding, bytes, s, s + 1) != cl) { + succChar(encoding, bytes, s, cl, node); /* wrapped to \0...\0. search next valid char. */ } - if (!enc.isAsciiCompatible()) { + if (!encoding.isAsciiCompatible) { System.arraycopy(bytes, s, carry, 0, cl); carryLen = cl; } @@ -757,15 +729,16 @@ public static RopeBuilder succCommon(Rope original, Node node) { } // MRI: enc_succ_char - public static NeighborChar succChar(Encoding enc, byte[] bytes, int p, int len, Node node) { + public static NeighborChar succChar(RubyEncoding encoding, byte[] bytes, int p, int len, Node node) { + Encoding enc = encoding.jcoding; int l; if (enc.minLength() > 1) { /* wchar, trivial case */ - int r = characterLength(enc, CR_UNKNOWN, bytes, p, p + len), c; + int r = characterLength(encoding, bytes, p, p + len), c; if (!MBCLEN_CHARFOUND_P(r)) { return NeighborChar.NOT_CHAR; } - c = codePoint(enc, CR_UNKNOWN, bytes, p, p + len, node) + 1; + c = codePoint(enc, bytes, p, p + len, node) + 1; l = codeLength(enc, c); if (l == 0) { return NeighborChar.NOT_CHAR; @@ -773,8 +746,8 @@ public static NeighborChar succChar(Encoding enc, byte[] bytes, int p, int len, if (l != len) { return NeighborChar.WRAPPED; } - EncodingUtils.encMbcput(c, bytes, p, enc); - r = characterLength(enc, CR_UNKNOWN, bytes, p, p + len); + enc.codeToMbc(c, bytes, p); + r = characterLength(encoding, bytes, p, p + len); if (!MBCLEN_CHARFOUND_P(r)) { return NeighborChar.NOT_CHAR; } @@ -790,7 +763,7 @@ public static NeighborChar succChar(Encoding enc, byte[] bytes, int p, int len, return NeighborChar.WRAPPED; } bytes[p + i] = (byte) ((bytes[p + i] & 0xff) + 1); - l = characterLength(enc, CR_UNKNOWN, bytes, p, p + len); + l = characterLength(encoding, bytes, p, p + len); if (MBCLEN_CHARFOUND_P(l)) { l = MBCLEN_CHARFOUND_LEN(l); if (l == len) { @@ -805,7 +778,7 @@ public static NeighborChar succChar(Encoding enc, byte[] bytes, int p, int len, int len2; int l2; for (len2 = len - 1; 0 < len2; len2--) { - l2 = characterLength(enc, CR_UNKNOWN, bytes, p, p + len2); + l2 = characterLength(encoding, bytes, p, p + len2); if (!MBCLEN_INVALID_P(l2)) { break; } @@ -818,8 +791,9 @@ public static NeighborChar succChar(Encoding enc, byte[] bytes, int p, int len, } // MRI: enc_succ_alnum_char - private static NeighborChar succAlnumChar(Encoding enc, byte[] bytes, int p, int len, byte[] carry, int carryP, - Node node) { + private static NeighborChar succAlnumChar(RubyEncoding encoding, byte[] bytes, int p, int len, byte[] carry, + int carryP, Node node) { + Encoding enc = encoding.jcoding; byte save[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN]; int c = enc.mbcToCode(bytes, p, p + len); @@ -833,7 +807,7 @@ private static NeighborChar succAlnumChar(Encoding enc, byte[] bytes, int p, int } System.arraycopy(bytes, p, save, 0, len); - NeighborChar ret = succChar(enc, bytes, p, len, node); + NeighborChar ret = succChar(encoding, bytes, p, len, node); if (ret == NeighborChar.FOUND) { c = enc.mbcToCode(bytes, p, p + len); if (enc.isCodeCType(c, cType)) { @@ -846,7 +820,7 @@ private static NeighborChar succAlnumChar(Encoding enc, byte[] bytes, int p, int while (true) { System.arraycopy(bytes, p, save, 0, len); - ret = predChar(enc, bytes, p, len, node); + ret = predChar(encoding, bytes, p, len, node); if (ret == NeighborChar.FOUND) { c = enc.mbcToCode(bytes, p, p + len); if (!enc.isCodeCType(c, cType)) { @@ -870,19 +844,20 @@ private static NeighborChar succAlnumChar(Encoding enc, byte[] bytes, int p, int } System.arraycopy(bytes, p, carry, carryP, len); - succChar(enc, carry, carryP, len, node); + succChar(encoding, carry, carryP, len, node); return NeighborChar.WRAPPED; } - private static NeighborChar predChar(Encoding enc, byte[] bytes, int p, int len, Node node) { + private static NeighborChar predChar(RubyEncoding encoding, byte[] bytes, int p, int len, Node node) { + Encoding enc = encoding.jcoding; int l; if (enc.minLength() > 1) { /* wchar, trivial case */ - int r = characterLength(enc, CR_UNKNOWN, bytes, p, p + len), c; + int r = characterLength(encoding, bytes, p, p + len), c; if (!MBCLEN_CHARFOUND_P(r)) { return NeighborChar.NOT_CHAR; } - c = codePoint(enc, CR_UNKNOWN, bytes, p, p + len, node); + c = codePoint(enc, bytes, p, p + len, node); if (c == 0) { return NeighborChar.NOT_CHAR; } @@ -894,8 +869,8 @@ private static NeighborChar predChar(Encoding enc, byte[] bytes, int p, int len, if (l != len) { return NeighborChar.WRAPPED; } - EncodingUtils.encMbcput(c, bytes, p, enc); - r = characterLength(enc, CR_UNKNOWN, bytes, p, p + len); + enc.codeToMbc(c, bytes, p); + r = characterLength(encoding, bytes, p, p + len); if (!MBCLEN_CHARFOUND_P(r)) { return NeighborChar.NOT_CHAR; } @@ -910,7 +885,7 @@ private static NeighborChar predChar(Encoding enc, byte[] bytes, int p, int len, return NeighborChar.WRAPPED; } bytes[p + i] = (byte) ((bytes[p + i] & 0xff) - 1); - l = characterLength(enc, CR_UNKNOWN, bytes, p, p + len); + l = characterLength(encoding, bytes, p, p + len); if (MBCLEN_CHARFOUND_P(l)) { l = MBCLEN_CHARFOUND_LEN(l); if (l == len) { @@ -925,7 +900,7 @@ private static NeighborChar predChar(Encoding enc, byte[] bytes, int p, int len, int len2; int l2; for (len2 = len - 1; 0 < len2; len2--) { - l2 = characterLength(enc, CR_UNKNOWN, bytes, p, p + len2); + l2 = characterLength(encoding, bytes, p, p + len2); if (!MBCLEN_INVALID_P(l2)) { break; } @@ -939,15 +914,16 @@ private static NeighborChar predChar(Encoding enc, byte[] bytes, int p, int len, /** rb_str_delete_bang */ @TruffleBoundary - public static Rope delete_bangCommon19(Rope rubyString, boolean[] squeeze, TrTables tables, Encoding enc, - Node node) { + public static TruffleString delete_bangCommon19(ATStringWithEncoding rubyString, boolean[] squeeze, TrTables tables, + RubyEncoding encoding, Node node) { + Encoding enc = encoding.jcoding; int s = 0; int t = s; int send = s + rubyString.byteLength(); byte[] bytes = rubyString.getBytesCopy(); boolean modified = false; - boolean asciiCompatible = enc.isAsciiCompatible(); - CodeRange cr = asciiCompatible ? CR_7BIT : CR_VALID; + boolean asciiCompatible = encoding.isAsciiCompatible; + var cr = asciiCompatible ? ASCII : VALID; while (s < send) { int c; if (asciiCompatible && Encoding.isAscii(c = bytes[s] & 0xff)) { @@ -970,41 +946,45 @@ public static Rope delete_bangCommon19(Rope rubyString, boolean[] squeeze, TrTab enc.codeToMbc(c, bytes, t); } t += cl; - if (cr == CR_7BIT) { - cr = CR_VALID; + if (cr == ASCII) { + cr = VALID; } } s += cl; } } - return modified ? RopeOperations.create(ArrayUtils.extractRange(bytes, 0, t), enc, cr) : null; + return modified + ? TStringUtils.fromByteArray(ArrayUtils.extractRange(bytes, 0, t), encoding) + /* cr */ : null; } /** rb_str_tr / rb_str_tr_bang */ - private static CodeRange CHECK_IF_ASCII(int c, CodeRange currentCodeRange) { - if (currentCodeRange == CR_7BIT && !Encoding.isAscii(c)) { - return CR_VALID; + private static TruffleString.CodeRange CHECK_IF_ASCII(int c, TruffleString.CodeRange currentCodeRange) { + if (currentCodeRange == ASCII && !Encoding.isAscii(c)) { + return VALID; } return currentCodeRange; } @TruffleBoundary - public static Rope trTransHelper(Rope self, Rope srcStr, Rope replStr, Encoding e1, Encoding enc, boolean sflag, - Node node) { + public static TruffleString trTransHelper(ATStringWithEncoding self, ATStringWithEncoding srcStr, + ATStringWithEncoding replStr, Encoding e1, RubyEncoding rubyEncoding, + boolean sflag, Node node) { // This method does not handle the cases where either srcStr or replStr are empty. It is the responsibility // of the caller to take the appropriate action in those cases. - CodeRange cr = self.getCodeRange(); + final Encoding enc = rubyEncoding.jcoding; + var cr = self.getCodeRange(); - final StringSupport.TR trSrc = new StringSupport.TR(srcStr); + final StringSupport.TR trSrc = new StringSupport.TR(srcStr.tstring, srcStr.encoding); boolean cflag = false; int[] l = { 0 }; if (srcStr.byteLength() > 1 && - EncodingUtils.encAscget(trSrc.buf, trSrc.p, trSrc.pend, l, enc, srcStr.getCodeRange()) == '^' && + encAscget(trSrc.buf, trSrc.p, trSrc.pend, l, enc, srcStr.getCodeRange()) == '^' && trSrc.p + 1 < trSrc.pend) { cflag = true; trSrc.p++; @@ -1012,7 +992,7 @@ public static Rope trTransHelper(Rope self, Rope srcStr, Rope replStr, Encoding int c, c0, last = 0; final int[] trans = new int[StringSupport.TRANS_SIZE]; - final StringSupport.TR trRepl = new StringSupport.TR(replStr); + final StringSupport.TR trRepl = new StringSupport.TR(replStr.tstring, replStr.encoding); boolean modified = false; IntHash hash = null; boolean singlebyte = self.isSingleByteOptimizable(); @@ -1065,16 +1045,16 @@ public static Rope trTransHelper(Rope self, Rope srcStr, Rope replStr, Encoding } } - if (cr == CR_VALID && enc.isAsciiCompatible()) { - cr = CR_7BIT; + if (cr == VALID && rubyEncoding.isAsciiCompatible) { + cr = ASCII; } int s = 0; int send = self.byteLength(); - final Rope ret; + final TruffleString ret; if (sflag) { - byte sbytes[] = self.getBytes(); + byte[] sbytes = self.getBytesOrCopy(); int clen, tlen; int max = self.byteLength(); int save = -1; @@ -1082,7 +1062,7 @@ public static Rope trTransHelper(Rope self, Rope srcStr, Rope replStr, Encoding int t = 0; while (s < send) { boolean mayModify = false; - c0 = c = codePoint(e1, CR_UNKNOWN, sbytes, s, send, node); + c0 = c = codePoint(e1, sbytes, s, send, node); clen = codeLength(e1, c); tlen = enc == e1 ? clen : codeLength(enc, c); s += clen; @@ -1135,9 +1115,9 @@ public static Rope trTransHelper(Rope self, Rope srcStr, Rope replStr, Encoding t += tlen; } - ret = RopeOperations.create(ArrayUtils.extractRange(buf, 0, t), enc, cr); - } else if (enc.isSingleByte() || (singlebyte && hash == null)) { - byte sbytes[] = self.getBytesCopy(); + ret = TStringUtils.fromByteArray(ArrayUtils.extractRange(buf, 0, t), rubyEncoding); // cr + } else if (rubyEncoding.isSingleByte || (singlebyte && hash == null)) { + byte[] sbytes = self.getBytesCopy(); while (s < send) { c = sbytes[s] & 0xff; if (trans[c] != -1) { @@ -1153,16 +1133,16 @@ public static Rope trTransHelper(Rope self, Rope srcStr, Rope replStr, Encoding s++; } - ret = RopeOperations.create(sbytes, enc, cr); + ret = TStringUtils.fromByteArray(sbytes, rubyEncoding); // cr } else { - byte sbytes[] = self.getBytes(); + byte[] sbytes = self.getBytesOrCopy(); int clen, tlen, max = (int) (self.byteLength() * 1.2); byte[] buf = new byte[max]; int t = 0; while (s < send) { boolean mayModify = false; - c0 = c = codePoint(e1, CR_UNKNOWN, sbytes, s, send, node); + c0 = c = codePoint(e1, sbytes, s, send, node); clen = codeLength(e1, c); tlen = enc == e1 ? clen : codeLength(enc, c); @@ -1210,7 +1190,7 @@ public static Rope trTransHelper(Rope self, Rope srcStr, Rope replStr, Encoding t += tlen; } - ret = RopeOperations.create(ArrayUtils.extractRange(buf, 0, t), enc, cr); + ret = TStringUtils.fromByteArray(ArrayUtils.extractRange(buf, 0, t), rubyEncoding); // cr } if (modified) { @@ -1236,50 +1216,34 @@ private static int trCode(int c, int[] trans, IntHash hash, boolean cfl } @TruffleBoundary - public static int multiByteCasecmp(Encoding enc, Rope value, Rope otherValue) { - byte[] bytes = value.getBytes(); - int p = 0; - int end = value.byteLength(); - - byte[] obytes = otherValue.getBytes(); - int op = 0; - int oend = otherValue.byteLength(); - - while (p < end && op < oend) { - final int c, oc; - if (enc.isAsciiCompatible()) { - c = bytes[p] & 0xff; - oc = obytes[op] & 0xff; - } else { - c = preciseCodePoint(enc, value.getCodeRange(), bytes, p, end); - oc = preciseCodePoint(enc, otherValue.getCodeRange(), obytes, op, oend); - } - - int cl, ocl; - if (enc.isAsciiCompatible() && Encoding.isAscii(c) && Encoding.isAscii(oc)) { + public static int multiByteCasecmp(RubyEncoding enc, AbstractTruffleString selfTString, + TruffleString.Encoding selfEncoding, AbstractTruffleString otherTString, + TruffleString.Encoding otherEncoding) { + var selfIterator = CreateCodePointIteratorNode.getUncached().execute(selfTString, selfEncoding, + ErrorHandling.RETURN_NEGATIVE); + var otherIterator = CreateCodePointIteratorNode.getUncached().execute(otherTString, otherEncoding, + ErrorHandling.RETURN_NEGATIVE); + + while (selfIterator.hasNext() && otherIterator.hasNext()) { + final int selfPos = selfIterator.getByteIndex(); + final int c = selfIterator.nextUncached(); + + final int otherPos = otherIterator.getByteIndex(); + final int oc = otherIterator.nextUncached(); + + if (enc.isAsciiCompatible && (c >= 0 && Encoding.isAscii(c)) && (oc >= 0 && Encoding.isAscii(oc))) { byte uc = AsciiTables.ToUpperCaseTable[c]; byte uoc = AsciiTables.ToUpperCaseTable[oc]; if (uc != uoc) { return uc < uoc ? -1 : 1; } - cl = ocl = 1; } else { - cl = characterLength( - enc, - enc == value.getEncoding() ? value.getCodeRange() : CR_UNKNOWN, - bytes, - p, - end, - true); - ocl = characterLength( - enc, - enc == otherValue.getEncoding() ? otherValue.getCodeRange() : CR_UNKNOWN, - obytes, - op, - oend, - true); + final int cl = selfIterator.getByteIndex() - selfPos; + final int ocl = otherIterator.getByteIndex() - otherPos; + // TODO: opt for 2 and 3 ? - int ret = caseCmp(bytes, p, obytes, op, cl < ocl ? cl : ocl); + int ret = caseCmp(selfTString, selfEncoding, otherTString, otherEncoding, selfPos, otherPos, + Math.min(cl, ocl)); if (ret != 0) { return ret < 0 ? -1 : 1; } @@ -1288,16 +1252,28 @@ public static int multiByteCasecmp(Encoding enc, Rope value, Rope otherValue) { } } - p += cl; - op += ocl; } - if (end - p == oend - op) { + + if (!selfIterator.hasNext() && !otherIterator.hasNext()) { return 0; } - return end - p > oend - op ? 1 : -1; + return selfIterator.hasNext() ? 1 : -1; } - public static boolean singleByteSqueeze(RopeBuilder value, boolean squeeze[]) { + private static int caseCmp(AbstractTruffleString a, TruffleString.Encoding aEncoding, + AbstractTruffleString b, TruffleString.Encoding bEncoding, int aPos, int bPos, int len) { + int i = 0; + while (i < len && a.readByteUncached(aPos + i, aEncoding) == b.readByteUncached(bPos + i, bEncoding)) { + i++; + } + if (i < len) { + return a.readByteUncached(aPos + i, aEncoding) > b.readByteUncached(bPos + i, + bEncoding) ? 1 : -1; + } + return 0; + } + + public static boolean singleByteSqueeze(TStringBuilder value, boolean squeeze[]) { int s = 0; int t = s; int send = s + value.getLength(); @@ -1320,8 +1296,8 @@ public static boolean singleByteSqueeze(RopeBuilder value, boolean squeeze[]) { } @TruffleBoundary - public static boolean multiByteSqueeze(RopeBuilder value, CodeRange originalCodeRange, boolean[] squeeze, - TrTables tables, Encoding enc, boolean isArg, Node node) { + public static boolean multiByteSqueeze(TStringBuilder value, TruffleString.CodeRange originalCodeRange, + boolean[] squeeze, TrTables tables, Encoding enc, boolean isArg, Node node) { int s = 0; int t = s; int send = s + value.getLength(); @@ -1361,7 +1337,7 @@ public static boolean multiByteSqueeze(RopeBuilder value, CodeRange originalCode @TruffleBoundary private static int caseMapChar(int codePoint, Encoding enc, byte[] stringBytes, int stringByteOffset, - RopeBuilder builder, IntHolder flags, byte[] workBuffer) { + ByteArrayBuilder builder, IntHolder flags, byte[] workBuffer) { final IntHolder fromP = new IntHolder(); fromP.value = stringByteOffset; @@ -1391,35 +1367,9 @@ private static int caseMapChar(int codePoint, Encoding enc, byte[] stringBytes, return newByteLength; } - /** Returns a copy of {@code bytes} but with ASCII characters' case swapped, or {@code bytes} itself if the string - * doesn't require changes. The encoding must be ASCII-compatible (i.e. represent each ASCII character as a single - * byte ({@link Encoding#isAsciiCompatible()}). */ - @TruffleBoundary - public static byte[] swapcaseMultiByteAsciiSimple(Encoding enc, CodeRange codeRange, byte[] bytes) { - assert enc.isAsciiCompatible(); - boolean modified = false; - int s = 0; - final int end = bytes.length; - - while (s < end) { - if (isAsciiAlpha(bytes[s])) { - if (!modified) { - bytes = bytes.clone(); - modified = true; - } - bytes[s] ^= 0x20; - s++; - } else { - s += characterLength(enc, codeRange, bytes, s, end); - } - } - - return bytes; - } - @TruffleBoundary - public static boolean swapCaseMultiByteComplex(Encoding enc, CodeRange originalCodeRange, RopeBuilder builder, - int caseMappingOptions, Node node) { + public static boolean swapCaseMultiByteComplex(Encoding enc, TruffleString.CodeRange originalCodeRange, + ByteArrayBuilder builder, int caseMappingOptions, Node node) { byte[] buf = new byte[CASE_MAP_BUFFER_SIZE]; final IntHolder flagP = new IntHolder(); @@ -1446,35 +1396,9 @@ public static boolean swapCaseMultiByteComplex(Encoding enc, CodeRange originalC return modified; } - /** Returns a copy of {@code bytes} but with ASCII characters downcased, or {@code bytes} itself if no ASCII - * characters need upcasing. The encoding must be ASCII-compatible (i.e. represent each ASCII character as a single - * byte ({@link Encoding#isAsciiCompatible()}). */ @TruffleBoundary - public static byte[] downcaseMultiByteAsciiSimple(Encoding enc, CodeRange codeRange, byte[] bytes) { - assert enc.isAsciiCompatible(); - boolean modified = false; - int s = 0; - final int end = bytes.length; - - while (s < end) { - if (isAsciiUppercase(bytes[s])) { - if (!modified) { - bytes = bytes.clone(); - modified = true; - } - bytes[s] ^= 0x20; - s++; - } else { - s += characterLength(enc, codeRange, bytes, s, end); - } - } - - return bytes; - } - - @TruffleBoundary - public static boolean downcaseMultiByteComplex(Encoding enc, CodeRange originalCodeRange, RopeBuilder builder, - int caseMappingOptions, Node node) { + public static boolean downcaseMultiByteComplex(Encoding enc, TruffleString.CodeRange originalCodeRange, + ByteArrayBuilder builder, int caseMappingOptions, Node node) { byte[] buf = new byte[CASE_MAP_BUFFER_SIZE]; final IntHolder flagP = new IntHolder(); @@ -1511,35 +1435,9 @@ public static boolean downcaseMultiByteComplex(Encoding enc, CodeRange originalC return modified; } - /** Returns a copy of {@code bytes} but with ASCII characters upcased, or {@code bytes} itself if no ASCII - * characters need upcasing. The encoding must be ASCII-compatible (i.e. represent each ASCII character as a single - * byte ( {@link Encoding#isAsciiCompatible()}). */ - @TruffleBoundary - public static byte[] upcaseMultiByteAsciiSimple(Encoding enc, CodeRange codeRange, byte[] bytes) { - assert enc.isAsciiCompatible(); - boolean modified = false; - int s = 0; - final int end = bytes.length; - - while (s < end) { - if (isAsciiLowercase(bytes[s])) { - if (!modified) { - bytes = bytes.clone(); - modified = true; - } - bytes[s] ^= 0x20; - s++; - } else { - s += characterLength(enc, codeRange, bytes, s, end); - } - } - - return bytes; - } - @TruffleBoundary - public static boolean upcaseMultiByteComplex(Encoding enc, CodeRange originalCodeRange, RopeBuilder builder, - int caseMappingOptions, Node node) { + public static boolean upcaseMultiByteComplex(Encoding enc, TruffleString.CodeRange originalCodeRange, + ByteArrayBuilder builder, int caseMappingOptions, Node node) { byte[] buf = new byte[CASE_MAP_BUFFER_SIZE]; final IntHolder flagP = new IntHolder(); @@ -1574,44 +1472,9 @@ public static boolean upcaseMultiByteComplex(Encoding enc, CodeRange originalCod return modified; } - /** Returns a copy of {@code bytes} but capitalized (affecting only ASCII characters), or {@code bytes} itself if - * the string doesn't require changes. The encoding must be ASCII-compatible (i.e. represent each ASCII character as - * a single byte ({@link Encoding#isAsciiCompatible()}). */ @TruffleBoundary - public static byte[] capitalizeMultiByteAsciiSimple(Encoding enc, CodeRange codeRange, byte[] bytes) { - assert enc.isAsciiCompatible(); - boolean modified = false; - final int end = bytes.length; - - if (end == 0) { - return bytes; - } - - if (StringSupport.isAsciiLowercase(bytes[0])) { - bytes = bytes.clone(); - bytes[0] ^= 0x20; - modified = true; - } - - int s = 1; - while (s < end) { - if (StringSupport.isAsciiUppercase(bytes[s])) { - if (!modified) { - bytes = bytes.clone(); - modified = true; - } - bytes[s] ^= 0x20; - s++; - } else { - s += StringSupport.characterLength(enc, codeRange, bytes, s, end); - } - } - - return bytes; - } - - @TruffleBoundary - public static boolean capitalizeMultiByteComplex(Encoding enc, CodeRange originalCodeRange, RopeBuilder builder, + public static boolean capitalizeMultiByteComplex(Encoding enc, TruffleString.CodeRange originalCodeRange, + ByteArrayBuilder builder, int caseMappingOptions, Node node) { byte[] buf = new byte[CASE_MAP_BUFFER_SIZE]; @@ -1657,10 +1520,23 @@ public static boolean capitalizeMultiByteComplex(Encoding enc, CodeRange origina //endregion //region Predicates + /** Like {@link Encoding#isAscii(int)} but correct */ + public static boolean isAscii(int c) { + return c >= 0 && c < 128; + } + + public static boolean isAsciiLowercase(int c) { + return c >= 'a' && c <= 'z'; + } + public static boolean isAsciiLowercase(byte c) { return c >= 'a' && c <= 'z'; } + public static boolean isAsciiUppercase(int c) { + return c >= 'A' && c <= 'Z'; + } + public static boolean isAsciiUppercase(byte c) { return c >= 'A' && c <= 'Z'; } @@ -1675,55 +1551,44 @@ static boolean isAsciiSpaceOrNull(int c) { } public static boolean isAsciiPrintable(int c) { - return c == ' ' || (c >= '!' && c <= '~'); - } - - public static boolean isAsciiAlpha(byte c) { - return isAsciiUppercase(c) || isAsciiLowercase(c); - } - - @TruffleBoundary - public static boolean isSpace(Encoding encoding, int c) { - return encoding.isSpace(c); - } - - public static boolean isAsciiCodepoint(int value) { - return value >= 0 && value < 128; + return c >= ' ' && c <= '~'; } //endregion //region undump helpers - private static final byte[] FORCE_ENCODING_BYTES = RopeOperations.encodeAsciiBytes(".force_encoding(\""); - private static final byte[] HEXDIGIT = RopeOperations.encodeAsciiBytes("0123456789abcdef0123456789ABCDEF"); + private static final byte[] FORCE_ENCODING_BYTES = StringOperations.encodeAsciiBytes(".force_encoding(\""); + private static final byte[] HEXDIGIT = StringOperations.encodeAsciiBytes("0123456789abcdef0123456789ABCDEF"); private static final String INVALID_FORMAT_MESSAGE = "invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form"; @TruffleBoundary - public static Pair undump(Rope rope, RubyEncoding encoding, RubyContext context, + public static Pair undump(ATStringWithEncoding rope, RubyEncoding encoding, + RubyContext context, Node currentNode) { - byte[] bytes = rope.getBytes(); - int start = 0; - int length = bytes.length; + var byteArray = rope.getInternalByteArray(); + byte[] bytes = byteArray.getArray(); + int start = byteArray.getOffset(); + final int end = byteArray.getEnd(); RubyEncoding resultEncoding = encoding; Encoding[] enc = { encoding.jcoding }; boolean[] utf8 = { false }; boolean[] binary = { false }; - RopeBuilder undumped = new RopeBuilder(); - undumped.setEncoding(enc[0]); + TStringBuilder undumped = new TStringBuilder(); + undumped.setEncoding(encoding); - CodeRange cr = rope.getCodeRange(); - if (cr != CR_7BIT) { + var cr = rope.getCodeRange(); + if (cr != ASCII) { throw new RaiseException( context, context.getCoreExceptions().runtimeError("non-ASCII character detected", currentNode)); } - if (ArrayUtils.memchr(bytes, start, bytes.length, (byte) '\0') != -1) { + if (ArrayUtils.memchr(bytes, start, byteArray.getLength(), (byte) '\0') != -1) { throw new RaiseException( context, context.getCoreExceptions().runtimeError("string contains null byte", currentNode)); } - if (length < 2) { + if (end - start < 2) { throw new RaiseException( context, context.getCoreExceptions().runtimeError(INVALID_FORMAT_MESSAGE, currentNode)); @@ -1737,7 +1602,7 @@ public static Pair undump(Rope rope, RubyEncoding enc start++; for (;;) { - if (start >= length) { + if (start >= end) { throw new RaiseException( context, context.getCoreExceptions().runtimeError("unterminated dumped string", currentNode)); @@ -1746,7 +1611,7 @@ public static Pair undump(Rope rope, RubyEncoding enc if (bytes[start] == '"') { /* epilogue */ start++; - if (start == length) { + if (start == end) { /* ascii compatible dumped string */ break; } else { @@ -1761,7 +1626,7 @@ public static Pair undump(Rope rope, RubyEncoding enc } size = FORCE_ENCODING_BYTES.length; - if (length - start <= size) { + if (end - start <= size) { throw new RaiseException( context, context.getCoreExceptions().runtimeError(INVALID_FORMAT_MESSAGE, currentNode)); @@ -1774,14 +1639,14 @@ public static Pair undump(Rope rope, RubyEncoding enc start += size; int encname = start; - start = ArrayUtils.memchr(bytes, start, length - start, (byte) '"'); + start = ArrayUtils.memchr(bytes, start, end - start, (byte) '"'); size = start - encname; if (start == -1) { throw new RaiseException( context, context.getCoreExceptions().runtimeError(INVALID_FORMAT_MESSAGE, currentNode)); } - if (length - start != 2) { + if (end - start != 2) { throw new RaiseException( context, context.getCoreExceptions().runtimeError(INVALID_FORMAT_MESSAGE, currentNode)); @@ -1791,7 +1656,7 @@ public static Pair undump(Rope rope, RubyEncoding enc context, context.getCoreExceptions().runtimeError(INVALID_FORMAT_MESSAGE, currentNode)); } - String encnameString = new String(bytes, encname, size, rope.encoding.getCharset()); + String encnameString = new String(bytes, encname, size, encoding.jcoding.getCharset()); RubyEncoding enc2 = context.getEncodingManager().getRubyEncoding(encnameString); if (enc2 == null) { throw new RaiseException( @@ -1800,7 +1665,7 @@ public static Pair undump(Rope rope, RubyEncoding enc "dumped string has unknown encoding name", currentNode)); } - undumped.setEncoding(enc2.jcoding); + undumped.setEncoding(enc2); resultEncoding = enc2; } break; @@ -1808,7 +1673,7 @@ public static Pair undump(Rope rope, RubyEncoding enc if (bytes[start] == '\\') { start++; - if (start >= length) { + if (start >= end) { throw new RaiseException( context, context.getCoreExceptions().runtimeError("invalid escape", currentNode)); @@ -1818,7 +1683,7 @@ public static Pair undump(Rope rope, RubyEncoding enc resultEncoding, bytes, start, - length, + end, enc, utf8, binary, @@ -1834,8 +1699,8 @@ public static Pair undump(Rope rope, RubyEncoding enc return Pair.create(undumped, resultEncoding); } - private static Pair undumpAfterBackslash(RopeBuilder out, RubyEncoding encoding, - byte[] bytes, int start, int length, Encoding[] enc, + private static Pair undumpAfterBackslash(TStringBuilder out, RubyEncoding encoding, + byte[] bytes, int start, int end, Encoding[] enc, boolean[] utf8, boolean[] binary, RubyContext context, Node currentNode) { long c; int codelen; @@ -1871,20 +1736,20 @@ private static Pair undumpAfterBackslash(RopeBuilder out, currentNode)); } utf8[0] = true; - if (++start >= length) { + if (++start >= end) { throw new RaiseException( context, context.getCoreExceptions().runtimeError("invalid Unicode escape", currentNode)); } if (enc[0] != UTF8Encoding.INSTANCE) { enc[0] = UTF8Encoding.INSTANCE; - out.setEncoding(UTF8Encoding.INSTANCE); + out.setEncoding(Encodings.UTF_8); resultEncoding = Encodings.UTF_8; } if (bytes[start] == '{') { /* handle u{...} form */ start++; for (;;) { - if (start >= length) { + if (start >= end) { throw new RaiseException( context, context.getCoreExceptions().runtimeError( @@ -1899,7 +1764,7 @@ private static Pair undumpAfterBackslash(RopeBuilder out, start++; continue; } - c = scanHex(bytes, start, length - start, hexlen); + c = scanHex(bytes, start, end - start, hexlen); if (hexlen[0] == 0 || hexlen[0] > 6) { throw new RaiseException( context, @@ -1917,7 +1782,7 @@ private static Pair undumpAfterBackslash(RopeBuilder out, context, context.getCoreExceptions().runtimeError("invalid Unicode codepoint", currentNode)); } - codelen = EncodingUtils.encMbcput((int) c, buf, 0, enc[0]); + codelen = enc[0].codeToMbc((int) c, buf, 0); out.append(buf, 0, codelen); start += hexlen[0]; } @@ -1933,7 +1798,7 @@ private static Pair undumpAfterBackslash(RopeBuilder out, context, context.getCoreExceptions().runtimeError("invalid Unicode codepoint", currentNode)); } - codelen = EncodingUtils.encMbcput((int) c, buf, 0, enc[0]); + codelen = enc[0].codeToMbc((int) c, buf, 0); out.append(buf, 0, codelen); start += hexlen[0]; } @@ -1947,7 +1812,7 @@ private static Pair undumpAfterBackslash(RopeBuilder out, currentNode)); } binary[0] = true; - if (++start >= length) { + if (++start >= end) { throw new RaiseException( context, context.getCoreExceptions().runtimeError("invalid hex escape", currentNode)); @@ -2007,4 +1872,5 @@ private static byte unescapeAscii(byte c) { return -1; } } + // endregion } diff --git a/src/main/java/org/truffleruby/core/string/StringUtils.java b/src/main/java/org/truffleruby/core/string/StringUtils.java index 6f8a746f3e1b..3ffa570e2d9a 100644 --- a/src/main/java/org/truffleruby/core/string/StringUtils.java +++ b/src/main/java/org/truffleruby/core/string/StringUtils.java @@ -11,8 +11,6 @@ import java.util.Locale; -import org.truffleruby.core.rope.RopeOperations; - import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; public abstract class StringUtils { @@ -35,7 +33,7 @@ public static String format(String format, Object... args) { } public static byte[] formatASCIIBytes(String format, Object... args) { - return RopeOperations.encodeAsciiBytes(format(format, args)); + return StringOperations.encodeAsciiBytes(format(format, args)); } @TruffleBoundary diff --git a/src/main/java/org/truffleruby/core/string/TBytesKey.java b/src/main/java/org/truffleruby/core/string/TBytesKey.java new file mode 100644 index 000000000000..656d5b0d3b0b --- /dev/null +++ b/src/main/java/org/truffleruby/core/string/TBytesKey.java @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2013, 2020 Oracle and/or its affiliates. All rights reserved. This + * code is released under a tri EPL/GPL/LGPL license. You can use it, + * redistribute it and/or modify it under the terms of the: + * + * Eclipse Public License version 2.0, or + * GNU General Public License version 2, or + * GNU Lesser General Public License version 2.1. + */ +package org.truffleruby.core.string; + +import java.util.Arrays; +import java.util.Objects; + +import com.oracle.truffle.api.strings.TruffleString; +import org.truffleruby.core.encoding.RubyEncoding; + +public class TBytesKey { + + private final byte[] bytes; + private RubyEncoding encoding; + private final int bytesHashCode; + + public TBytesKey(byte[] bytes, RubyEncoding encoding) { + this.bytes = bytes; + this.encoding = encoding; + this.bytesHashCode = Arrays.hashCode(bytes); + } + + @Override + public int hashCode() { + return bytesHashCode; + } + + @Override + public boolean equals(Object o) { + if (o instanceof TBytesKey) { + final TBytesKey other = (TBytesKey) o; + if (encoding == null) { + if (Arrays.equals(bytes, other.bytes)) { + // For getMatchedEncoding() + this.encoding = Objects.requireNonNull(other.encoding); + return true; + } else { + return false; + } + } else { + return encoding == other.encoding && Arrays.equals(bytes, other.bytes); + } + } + + return false; + } + + public RubyEncoding getMatchedEncoding() { + return encoding; + } + + @Override + public String toString() { + var encoding = this.encoding != null ? this.encoding.tencoding : TruffleString.Encoding.BYTES; + return TruffleString.fromByteArrayUncached(bytes, encoding, false).toString(); + } + +} diff --git a/src/main/java/org/truffleruby/core/string/TStringBuilder.java b/src/main/java/org/truffleruby/core/string/TStringBuilder.java new file mode 100644 index 000000000000..abbb5e369373 --- /dev/null +++ b/src/main/java/org/truffleruby/core/string/TStringBuilder.java @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2016, 2021 Oracle and/or its affiliates. All rights reserved. This + * code is released under a tri EPL/GPL/LGPL license. You can use it, + * redistribute it and/or modify it under the terms of the: + * + * Eclipse Public License version 2.0, or + * GNU General Public License version 2, or + * GNU Lesser General Public License version 2.1. + */ +package org.truffleruby.core.string; + +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.TruffleString; +import org.jcodings.Encoding; +import org.truffleruby.collections.ByteArrayBuilder; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; + +public class TStringBuilder extends ByteArrayBuilder { + + private RubyEncoding encoding = Encodings.BINARY; + + public TStringBuilder() { + super(); + } + + public TStringBuilder(int size) { + super(size); + } + + public static TStringBuilder create(int size) { + return new TStringBuilder(size); + } + + public static TStringBuilder create(byte[] bytes, RubyEncoding encoding) { + final TStringBuilder builder = new TStringBuilder(bytes.length); + builder.append(bytes); + builder.setEncoding(encoding); + return builder; + } + + public static TStringBuilder create(RubyString rubyString) { + final RubyEncoding enc = rubyString.getEncodingUncached(); + return create(rubyString.tstring.getInternalByteArrayUncached(enc.tencoding), enc); + } + + public static TStringBuilder create(byte[] bytes) { + final TStringBuilder builder = new TStringBuilder(bytes.length); + builder.append(bytes); + return builder; + } + + public static TStringBuilder create(byte[] bytes, int index, int len) { + final TStringBuilder builder = new TStringBuilder(len); + builder.append(bytes, index, len); + return builder; + } + + public static TStringBuilder create(InternalByteArray bytes) { + return create(bytes.getArray(), bytes.getOffset(), bytes.getLength()); + } + + public static TStringBuilder create(InternalByteArray bytes, RubyEncoding encoding) { + var builder = create(bytes); + builder.setEncoding(encoding); + return builder; + } + + public RubyEncoding getRubyEncoding() { + return encoding; + } + + public Encoding getEncoding() { + return encoding.jcoding; + } + + public void setEncoding(RubyEncoding encoding) { + this.encoding = encoding; + } + + public TruffleString toTString() { + return TStringUtils.fromByteArray(getBytes(), encoding); + } + + /** All callers of this method must guarantee that the builder's byte array cannot change after this call, otherwise + * the TruffleString built from the builder will end up in an inconsistent state. */ + public TruffleString toTStringUnsafe(TruffleString.FromByteArrayNode fromByteArrayNode) { + return fromByteArrayNode.execute(getUnsafeBytes(), 0, getLength(), encoding.tencoding, false); + } + + public TStringWithEncoding toTStringWithEnc() { + return new TStringWithEncoding(TStringUtils.fromByteArray(getBytes(), encoding), encoding); + } + +} diff --git a/src/main/java/org/truffleruby/core/string/TStringCache.java b/src/main/java/org/truffleruby/core/string/TStringCache.java new file mode 100644 index 000000000000..5f97e46ee593 --- /dev/null +++ b/src/main/java/org/truffleruby/core/string/TStringCache.java @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2013, 2020 Oracle and/or its affiliates. All rights reserved. This + * code is released under a tri EPL/GPL/LGPL license. You can use it, + * redistribute it and/or modify it under the terms of the: + * + * Eclipse Public License version 2.0, or + * GNU General Public License version 2, or + * GNU Lesser General Public License version 2.1. + */ +package org.truffleruby.core.string; + +import com.oracle.truffle.api.strings.TruffleString; +import org.truffleruby.collections.WeakValueCache; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.symbol.CoreSymbols; +import org.truffleruby.core.symbol.RubySymbol; + +import com.oracle.truffle.api.CompilerDirectives; +import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; + +public class TStringCache { + + private final WeakValueCache bytesToTString = new WeakValueCache<>(); + + private int byteArrayReusedCount; + private int tstringsReusedCount; + private int tstringBytesSaved; + + public TStringCache(CoreSymbols coreSymbols) { + addTStringConstants(); + addCoreSymbolTStrings(coreSymbols); + addFrozenStrings(); + } + + private void addFrozenStrings() { + for (var tstring : FrozenStrings.TSTRINGS) { + register(tstring, Encodings.BINARY); + } + } + + private void addTStringConstants() { + for (var tstring : TStringConstants.UTF8_SINGLE_BYTE) { + register(tstring, Encodings.UTF_8); + } + for (var tstring : TStringConstants.US_ASCII_SINGLE_BYTE) { + register(tstring, Encodings.US_ASCII); + } + for (var tstring : TStringConstants.BINARY_SINGLE_BYTE) { + register(tstring, Encodings.BINARY); + } + for (var tstring : TStringConstants.TSTRING_CONSTANTS.values()) { + register(tstring, Encodings.US_ASCII); + } + } + + private void addCoreSymbolTStrings(CoreSymbols coreSymbols) { + for (RubySymbol symbol : coreSymbols.CORE_SYMBOLS) { + register(symbol.tstring, symbol.encoding); + } + } + + private void register(TruffleString tstring, RubyEncoding encoding) { + final TBytesKey key = new TBytesKey(TStringUtils.getBytesOrFail(tstring, encoding), encoding); + final TruffleString existing = bytesToTString.put(key, tstring); + if (existing != null && existing != tstring) { + throw CompilerDirectives.shouldNotReachHere("Duplicate TruffleString in TStringCache: " + existing); + } + } + + public TruffleString getTString(TruffleString string, RubyEncoding encoding) { + return getTString(TStringUtils.getBytesOrCopy(string, encoding), encoding); + } + + @TruffleBoundary + public TruffleString getTString(byte[] bytes, RubyEncoding rubyEncoding) { + assert rubyEncoding != null; + + final TBytesKey key = new TBytesKey(bytes, rubyEncoding); + + final TruffleString tstring = bytesToTString.get(key); + if (tstring != null) { + ++tstringsReusedCount; + tstringBytesSaved += tstring.byteLength(rubyEncoding.tencoding); + + return tstring; + } + + // At this point, we were unable to find a TruffleString with the same bytes and encoding (i.e., a direct match). + // However, there may still be a TruffleString with the same byte[] and sharing a direct byte[] can still allow some + // reference equality optimizations. So, do another search but with a marker encoding. The only guarantee + // we can make about the resulting TruffleString is that it would have the same logical byte[], but that's good enough + // for our purposes. + TBytesKey keyNoEncoding = new TBytesKey(bytes, null); + final TruffleString tstringWithSameBytesButDifferentEncoding = bytesToTString.get(keyNoEncoding); + + final TruffleString newTString; + if (tstringWithSameBytesButDifferentEncoding != null) { + var prevEncoding = keyNoEncoding.getMatchedEncoding().tencoding; + newTString = tstringWithSameBytesButDifferentEncoding.forceEncodingUncached(prevEncoding, + rubyEncoding.tencoding); + + ++byteArrayReusedCount; + tstringBytesSaved += newTString.byteLength(rubyEncoding.tencoding); + } else { + newTString = TStringUtils.fromByteArray(bytes, rubyEncoding); + } + + // Use the new TruffleString bytes in the cache, so we do not keep bytes alive unnecessarily. + final TBytesKey newKey = new TBytesKey(TStringUtils.getBytesOrCopy(newTString, rubyEncoding), rubyEncoding); + return bytesToTString.addInCacheIfAbsent(newKey, newTString); + } + + public boolean contains(TruffleString string, RubyEncoding encoding) { + final TBytesKey key = new TBytesKey(TStringUtils.getBytesOrCopy(string, encoding), encoding); + + return bytesToTString.get(key) != null; + } + + public int getByteArrayReusedCount() { + return byteArrayReusedCount; + } + + public int getTStringsReusedCount() { + return tstringsReusedCount; + } + + public int getTStringBytesSaved() { + return tstringBytesSaved; + } + + public int totalTStrings() { + return bytesToTString.size(); + } + +} diff --git a/src/main/java/org/truffleruby/core/string/TStringConstants.java b/src/main/java/org/truffleruby/core/string/TStringConstants.java new file mode 100644 index 000000000000..8fc92f62b54f --- /dev/null +++ b/src/main/java/org/truffleruby/core/string/TStringConstants.java @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2016, 2022 Oracle and/or its affiliates. All rights reserved. This + * code is released under a tri EPL/GPL/LGPL license. You can use it, + * redistribute it and/or modify it under the terms of the: + * + * Eclipse Public License version 2.0, or + * GNU General Public License version 2, or + * GNU Lesser General Public License version 2.1. + */ +package org.truffleruby.core.string; + +import com.oracle.truffle.api.CompilerDirectives.CompilationFinal; +import com.oracle.truffle.api.strings.TruffleString; +import org.truffleruby.core.encoding.TStringUtils; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +// Must use TruffleString.Encoding and not RubyEncoding to avoid initialization cycle +public class TStringConstants { + public static final Map TSTRING_CONSTANTS = new HashMap<>(); + + public static final TruffleString EMPTY_BINARY = withHashCode(TruffleString.Encoding.BYTES.getEmpty()); + public static final TruffleString EMPTY_US_ASCII = withHashCode(TruffleString.Encoding.US_ASCII.getEmpty()); + public static final TruffleString EMPTY_UTF8 = withHashCode(TruffleString.Encoding.UTF_8.getEmpty()); + + @CompilationFinal(dimensions = 1) public static final byte[] EMPTY_BYTES = new byte[0]; + @CompilationFinal(dimensions = 1) public static final byte[] NEWLINE_BYTE_ARRAY = new byte[]{ '\n' }; + + @CompilationFinal(dimensions = 1) public static final TruffleString[] UTF8_SINGLE_BYTE = new TruffleString[256]; + @CompilationFinal(dimensions = 1) public static final TruffleString[] US_ASCII_SINGLE_BYTE = new TruffleString[256]; + @CompilationFinal(dimensions = 1) public static final TruffleString[] BINARY_SINGLE_BYTE = new TruffleString[256]; + @CompilationFinal(dimensions = 1) private static final TruffleString[] PADDED_NUMBERS = createPaddedNumbersTable(); + @CompilationFinal(dimensions = 1) private static final TruffleString[] PADDING_ZEROS = createPaddingZeroTable(); + + static { + for (int i = 0; i < 256; i++) { + final byte[] bytes = new byte[]{ (byte) i }; + UTF8_SINGLE_BYTE[i] = withHashCode(TStringUtils.fromByteArray(bytes, TruffleString.Encoding.UTF_8)); + US_ASCII_SINGLE_BYTE[i] = withHashCode(TStringUtils.fromByteArray(bytes, TruffleString.Encoding.US_ASCII)); + BINARY_SINGLE_BYTE[i] = withHashCode(TStringUtils.fromByteArray(bytes, TruffleString.Encoding.BYTES)); + } + } + + public static final TruffleString AMPERSAND = ascii("&"); + public static final TruffleString AMPERSAND_AMPERSAND = ascii("&&"); + public static final TruffleString AMPERSAND_DOT = ascii("&."); + public static final TruffleString BACKTICK = ascii("`"); + public static final TruffleString BACKSLASH = ascii("\\"); + public static final TruffleString BANG = ascii("!"); + public static final TruffleString BANG_EQ = ascii("!="); + public static final TruffleString BANG_TILDE = ascii("!~"); + public static final TruffleString CALL = ascii("call"); + public static final TruffleString CARET = ascii("^"); + public static final TruffleString COLON = ascii(":"); + public static final TruffleString COLON_COLON = ascii("::"); + public static final TruffleString COMMA = ascii(","); + public static final TruffleString DOT = ascii("."); + public static final TruffleString DOT_DOT = ascii(".."); + public static final TruffleString DOT_DOT_DOT = ascii("..."); + public static final TruffleString DOLLAR_BANG = ascii("$!"); + public static final TruffleString DOLLAR_ZERO = ascii("$0"); + public static final TruffleString EQ = ascii("="); + public static final TruffleString EQ_EQ = ascii("=="); + public static final TruffleString EQ_EQ_EQ = ascii("==="); + public static final TruffleString EQ_GT = ascii("=>"); + public static final TruffleString EQ_TILDE = ascii("=~"); + public static final TruffleString FALSE = ascii("false"); + public static final TruffleString GT = ascii(">"); + public static final TruffleString GT_EQ = ascii(">="); + public static final TruffleString GT_GT = ascii(">>"); + public static final TruffleString LBRACKET = ascii("["); + public static final TruffleString LBRACKET_RBRACKET = ascii("[]"); + public static final TruffleString LBRACKET_RBRACKET_EQ = ascii("[]="); + public static final TruffleString LCURLY = ascii("{"); + public static final TruffleString LT = ascii("<"); + public static final TruffleString LT_EQ = ascii("<="); + public static final TruffleString LT_EQ_GT = ascii("<=>"); + public static final TruffleString LT_LT = ascii("<<"); + public static final TruffleString MINUS = ascii("-"); + public static final TruffleString MINUS_AT = ascii("-@"); + public static final TruffleString MINUS_GT = ascii("->"); + public static final TruffleString NIL = ascii("nil"); + public static final TruffleString OR = ascii("|"); + public static final TruffleString OR_OR = ascii("||"); + public static final TruffleString PERCENT = ascii("%"); + public static final TruffleString PLUS = ascii("+"); + public static final TruffleString PLUS_AT = ascii("+@"); + public static final TruffleString Q = ascii("'"); + public static final TruffleString QQ = ascii("\""); + public static final TruffleString QUESTION = ascii("?"); + public static final TruffleString RBRACKET = ascii("]"); + public static final TruffleString RCURLY = ascii("}"); + public static final TruffleString RPAREN = ascii(")"); + public static final TruffleString SEMICOLON = ascii(";"); + public static final TruffleString SLASH = ascii("/"); + public static final TruffleString STAR = ascii("*"); + public static final TruffleString STAR_STAR = ascii("**"); + public static final TruffleString TILDE = ascii("~"); + public static final TruffleString TRUE = ascii("true"); + // Encoding names, generated by: + // names = Encoding.list.map { |e| e.name } + // names.each { |n| puts "public static final TruffleString #{n.upcase.gsub('-','_')} = ascii(\"#{n}\");" } + public static final TruffleString ASCII_8BIT = ascii("ASCII-8BIT"); + public static final TruffleString US_ASCII = ascii("US-ASCII"); + public static final TruffleString UTF_8 = ascii("UTF-8"); + public static final TruffleString BIG5 = ascii("Big5"); + public static final TruffleString BIG5_HKSCS = ascii("Big5-HKSCS"); + public static final TruffleString BIG5_UAO = ascii("Big5-UAO"); + public static final TruffleString CP949 = ascii("CP949"); + public static final TruffleString EMACS_MULE = ascii("Emacs-Mule"); + public static final TruffleString EUC_JP = ascii("EUC-JP"); + public static final TruffleString EUC_KR = ascii("EUC-KR"); + public static final TruffleString EUC_TW = ascii("EUC-TW"); + public static final TruffleString GB18030 = ascii("GB18030"); + public static final TruffleString GBK = ascii("GBK"); + public static final TruffleString ISO_8859_1 = ascii("ISO-8859-1"); + public static final TruffleString ISO_8859_2 = ascii("ISO-8859-2"); + public static final TruffleString ISO_8859_3 = ascii("ISO-8859-3"); + public static final TruffleString ISO_8859_4 = ascii("ISO-8859-4"); + public static final TruffleString ISO_8859_5 = ascii("ISO-8859-5"); + public static final TruffleString ISO_8859_6 = ascii("ISO-8859-6"); + public static final TruffleString ISO_8859_7 = ascii("ISO-8859-7"); + public static final TruffleString ISO_8859_8 = ascii("ISO-8859-8"); + public static final TruffleString ISO_8859_9 = ascii("ISO-8859-9"); + public static final TruffleString ISO_8859_10 = ascii("ISO-8859-10"); + public static final TruffleString ISO_8859_11 = ascii("ISO-8859-11"); + public static final TruffleString ISO_8859_13 = ascii("ISO-8859-13"); + public static final TruffleString ISO_8859_14 = ascii("ISO-8859-14"); + public static final TruffleString ISO_8859_15 = ascii("ISO-8859-15"); + public static final TruffleString ISO_8859_16 = ascii("ISO-8859-16"); + public static final TruffleString KOI8_R = ascii("KOI8-R"); + public static final TruffleString KOI8_U = ascii("KOI8-U"); + public static final TruffleString SHIFT_JIS = ascii("Shift_JIS"); + public static final TruffleString UTF_16BE = ascii("UTF-16BE"); + public static final TruffleString UTF_16LE = ascii("UTF-16LE"); + public static final TruffleString UTF_32BE = ascii("UTF-32BE"); + public static final TruffleString UTF_32LE = ascii("UTF-32LE"); + public static final TruffleString WINDOWS_31J = ascii("Windows-31J"); + public static final TruffleString WINDOWS_1250 = ascii("Windows-1250"); + public static final TruffleString WINDOWS_1251 = ascii("Windows-1251"); + public static final TruffleString WINDOWS_1252 = ascii("Windows-1252"); + public static final TruffleString WINDOWS_1253 = ascii("Windows-1253"); + public static final TruffleString WINDOWS_1254 = ascii("Windows-1254"); + public static final TruffleString WINDOWS_1257 = ascii("Windows-1257"); + public static final TruffleString IBM437 = ascii("IBM437"); + public static final TruffleString IBM737 = ascii("IBM737"); + public static final TruffleString IBM775 = ascii("IBM775"); + public static final TruffleString CP850 = ascii("CP850"); + public static final TruffleString IBM852 = ascii("IBM852"); + public static final TruffleString CP852 = ascii("CP852"); + public static final TruffleString IBM855 = ascii("IBM855"); + public static final TruffleString CP855 = ascii("CP855"); + public static final TruffleString IBM857 = ascii("IBM857"); + public static final TruffleString IBM860 = ascii("IBM860"); + public static final TruffleString IBM861 = ascii("IBM861"); + public static final TruffleString IBM862 = ascii("IBM862"); + public static final TruffleString IBM863 = ascii("IBM863"); + public static final TruffleString IBM864 = ascii("IBM864"); + public static final TruffleString IBM865 = ascii("IBM865"); + public static final TruffleString IBM866 = ascii("IBM866"); + public static final TruffleString IBM869 = ascii("IBM869"); + public static final TruffleString WINDOWS_1258 = ascii("Windows-1258"); + public static final TruffleString GB1988 = ascii("GB1988"); + public static final TruffleString MACCENTEURO = ascii("macCentEuro"); + public static final TruffleString MACCROATIAN = ascii("macCroatian"); + public static final TruffleString MACCYRILLIC = ascii("macCyrillic"); + public static final TruffleString MACGREEK = ascii("macGreek"); + public static final TruffleString MACICELAND = ascii("macIceland"); + public static final TruffleString MACROMAN = ascii("macRoman"); + public static final TruffleString MACROMANIA = ascii("macRomania"); + public static final TruffleString MACTHAI = ascii("macThai"); + public static final TruffleString MACTURKISH = ascii("macTurkish"); + public static final TruffleString MACUKRAINE = ascii("macUkraine"); + public static final TruffleString CP950 = ascii("CP950"); + public static final TruffleString CP951 = ascii("CP951"); + public static final TruffleString IBM037 = ascii("IBM037"); + public static final TruffleString STATELESS_ISO_2022_JP = ascii("stateless-ISO-2022-JP"); + public static final TruffleString EUCJP_MS = ascii("eucJP-ms"); + public static final TruffleString CP51932 = ascii("CP51932"); + public static final TruffleString EUC_JIS_2004 = ascii("EUC-JIS-2004"); + public static final TruffleString GB2312 = ascii("GB2312"); + public static final TruffleString GB12345 = ascii("GB12345"); + public static final TruffleString ISO_2022_JP = ascii("ISO-2022-JP"); + public static final TruffleString ISO_2022_JP_2 = ascii("ISO-2022-JP-2"); + public static final TruffleString CP50220 = ascii("CP50220"); + public static final TruffleString CP50221 = ascii("CP50221"); + public static final TruffleString WINDOWS_1256 = ascii("Windows-1256"); + public static final TruffleString WINDOWS_1255 = ascii("Windows-1255"); + public static final TruffleString TIS_620 = ascii("TIS-620"); + public static final TruffleString WINDOWS_874 = ascii("Windows-874"); + public static final TruffleString MACJAPANESE = ascii("MacJapanese"); + public static final TruffleString UTF_7 = ascii("UTF-7"); + public static final TruffleString UTF8_MAC = ascii("UTF8-MAC"); + public static final TruffleString UTF_16 = ascii("UTF-16"); + public static final TruffleString UTF_32 = ascii("UTF-32"); + public static final TruffleString UTF8_DOCOMO = ascii("UTF8-DoCoMo"); + public static final TruffleString SJIS_DOCOMO = ascii("SJIS-DoCoMo"); + public static final TruffleString UTF8_KDDI = ascii("UTF8-KDDI"); + public static final TruffleString SJIS_KDDI = ascii("SJIS-KDDI"); + public static final TruffleString ISO_2022_JP_KDDI = ascii("ISO-2022-JP-KDDI"); + public static final TruffleString STATELESS_ISO_2022_JP_KDDI = ascii("stateless-ISO-2022-JP-KDDI"); + public static final TruffleString UTF8_SOFTBANK = ascii("UTF8-SoftBank"); + public static final TruffleString SJIS_SOFTBANK = ascii("SJIS-SoftBank"); + + private static TruffleString ascii(String string) { + if (string.length() == 1) { + return US_ASCII_SINGLE_BYTE[string.charAt(0)]; + } else { + final TruffleString tstring = TStringUtils.fromJavaString(string, TruffleString.Encoding.US_ASCII); + var before = TSTRING_CONSTANTS.putIfAbsent(string, tstring); + + if (before != null) { + throw new AssertionError("Duplicate TruffleString in TStringConstants: " + before); + } + + return tstring; + } + } + + public static TruffleString lookupUSASCIITString(String string) { + if (string.length() == 1) { + return US_ASCII_SINGLE_BYTE[string.charAt(0)]; + } else { + return TSTRING_CONSTANTS.get(string); + } + } + + private static TruffleString[] createPaddedNumbersTable() { + final TruffleString[] table = new TruffleString[100]; + + for (int n = 0; n < table.length; n++) { + table[n] = TruffleString.fromByteArrayUncached( + new byte[]{ (byte) ('0' + n / 10), (byte) ('0' + n % 10) }, + TruffleString.Encoding.UTF_8, + false); + } + + return table; + } + + /*** Zero-padded numbers in the format %02d, between 00 and 99. */ + public static TruffleString paddedNumber(int n) { + return PADDED_NUMBERS[n]; + } + + private static TruffleString[] createPaddingZeroTable() { + final TruffleString[] table = new TruffleString[6]; + + for (int n = 0; n < table.length; n++) { + final byte[] bytes = new byte[n]; + + Arrays.fill(bytes, (byte) '0'); + + table[n] = TruffleString.fromByteArrayUncached(bytes, TruffleString.Encoding.UTF_8, false); + } + + return table; + } + + public static TruffleString paddingZeros(int n) { + return PADDING_ZEROS[n]; + } + + private static T withHashCode(T object) { + object.hashCode(); + return object; + } +} diff --git a/src/main/java/org/truffleruby/core/string/TStringWithEncoding.java b/src/main/java/org/truffleruby/core/string/TStringWithEncoding.java new file mode 100644 index 000000000000..b3361b063ece --- /dev/null +++ b/src/main/java/org/truffleruby/core/string/TStringWithEncoding.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2022 Oracle and/or its affiliates. All rights reserved. This + * code is released under a tri EPL/GPL/LGPL license. You can use it, + * redistribute it and/or modify it under the terms of the: + * + * Eclipse Public License version 2.0, or + * GNU General Public License version 2, or + * GNU Lesser General Public License version 2.1. + */ +package org.truffleruby.core.string; + +import com.oracle.truffle.api.CompilerAsserts; +import com.oracle.truffle.api.strings.TruffleString.AsTruffleStringNode; +import org.truffleruby.core.encoding.RubyEncoding; + +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; + +/** TruffleString with RubyEncoding */ +public final class TStringWithEncoding extends TStringWithEncodingBase { + + public final TruffleString tstring; + + public TStringWithEncoding(TruffleString tstring, RubyEncoding encoding) { + super(tstring, encoding); + this.tstring = tstring; + } + + public TStringWithEncoding( + AsTruffleStringNode asTruffleStringNode, + AbstractTruffleString tstring, + RubyEncoding encoding) { + this(asTruffleStringNode.execute(tstring, encoding.tencoding), encoding); + } + + @Override + public TStringWithEncoding asImmutable() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return this; + } + + @Override + public TStringWithEncoding forceEncoding(RubyEncoding newEncoding) { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + if (encoding == newEncoding) { + return this; + } else { + return super.forceEncoding(newEncoding); + } + } + +} diff --git a/src/main/java/org/truffleruby/core/string/TStringWithEncodingBase.java b/src/main/java/org/truffleruby/core/string/TStringWithEncodingBase.java new file mode 100644 index 000000000000..a63f463a0528 --- /dev/null +++ b/src/main/java/org/truffleruby/core/string/TStringWithEncodingBase.java @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2022 Oracle and/or its affiliates. All rights reserved. This + * code is released under a tri EPL/GPL/LGPL license. You can use it, + * redistribute it and/or modify it under the terms of the: + * + * Eclipse Public License version 2.0, or + * GNU General Public License version 2, or + * GNU Lesser General Public License version 2.1. + */ +package org.truffleruby.core.string; + +import java.util.Objects; + +import com.oracle.truffle.api.strings.TruffleString; +import com.oracle.truffle.api.strings.TruffleString.CreateCodePointIteratorNode; +import com.oracle.truffle.api.strings.TruffleString.ErrorHandling; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; + +import com.oracle.truffle.api.CompilerAsserts; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.TruffleStringIterator; + +abstract class TStringWithEncodingBase { + + public final AbstractTruffleString tstring; + public final RubyEncoding encoding; + + protected TStringWithEncodingBase(AbstractTruffleString tstring, RubyEncoding encoding) { + assert tstring.isCompatibleTo(encoding.tencoding); + this.tstring = tstring; + this.encoding = encoding; + } + + public abstract TStringWithEncoding asImmutable(); + + public final RubyEncoding getEncoding() { + return encoding; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (this.getClass() != o.getClass()) { + return false; + } + TStringWithEncodingBase that = (TStringWithEncodingBase) o; + return encoding == that.encoding && tstring.equals(that.tstring); + } + + @Override + public int hashCode() { + return Objects.hash(tstring, encoding); + } + + @Override + public String toString() { + return getClass().getSimpleName() + ":" + tstring.toStringDebug(); + } + + public int byteLength() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return tstring.byteLength(encoding.tencoding); + } + + public int characterLength() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return tstring.codePointLengthUncached(encoding.tencoding); + } + + public InternalByteArray getInternalByteArray() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return tstring.getInternalByteArrayUncached(encoding.tencoding); + } + + public TruffleString.CodeRange getCodeRange() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return tstring.getByteCodeRangeUncached(encoding.tencoding); + } + + public TStringWithEncoding forceEncoding(RubyEncoding newEncoding) { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return new TStringWithEncoding(tstring.forceEncodingUncached(encoding.tencoding, newEncoding.tencoding), + newEncoding); + } + + public boolean isAsciiOnly() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return StringGuards.is7BitUncached(tstring, encoding); + } + + public int get(int index) { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return tstring.readByteUncached(index, encoding.tencoding); + } + + public byte getByte(int index) { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return (byte) tstring.readByteUncached(index, encoding.tencoding); + } + + public TStringWithEncoding substring(int byteOffset, int length) { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return new TStringWithEncoding(tstring.substringByteIndexUncached(byteOffset, length, encoding.tencoding, true), + encoding); + } + + public TruffleString substringAsTString(int byteOffset, int length) { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return tstring.substringByteIndexUncached(byteOffset, length, encoding.tencoding, true); + } + + public String toJavaString() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return tstring.toJavaStringUncached(); + } + + public String toJavaStringOrThrow() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return TStringUtils.toJavaStringOrThrow(tstring, encoding); + } + + public TruffleStringIterator createCodePointIterator() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return CreateCodePointIteratorNode.getUncached().execute(tstring, encoding.tencoding, + ErrorHandling.RETURN_NEGATIVE); + } + + public boolean isSingleByteOptimizable() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return isAsciiOnly() || encoding.isSingleByte; + } + + public byte[] getBytesCopy() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return tstring.copyToByteArrayUncached(encoding.tencoding); + } + + public byte[] getBytesOrCopy() { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return TStringUtils.getBytesOrCopy(tstring, encoding); + } + + /** byteOffset is logical, recoverIfBroken=false */ + public int characterLength(int byteOffset) { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + return tstring.byteLengthOfCodePointUncached(byteOffset, encoding.tencoding, ErrorHandling.RETURN_NEGATIVE); + } + +} diff --git a/src/main/java/org/truffleruby/core/string/TruffleStringNodes.java b/src/main/java/org/truffleruby/core/string/TruffleStringNodes.java deleted file mode 100644 index 0d6254884565..000000000000 --- a/src/main/java/org/truffleruby/core/string/TruffleStringNodes.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2016, 2021 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.string; - -import org.truffleruby.builtins.CoreMethod; -import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; -import org.truffleruby.builtins.CoreModule; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.language.control.RaiseException; - -import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; -import com.oracle.truffle.api.dsl.Cached; -import com.oracle.truffle.api.dsl.Specialization; - -@CoreModule("Truffle::StringOperations") -public class TruffleStringNodes { - - @CoreMethod(names = "truncate", onSingleton = true, required = 2, lowerFixnum = 2) - public abstract static class TruncateNode extends CoreMethodArrayArgumentsNode { - - @Specialization(guards = { "newByteLength < 0" }) - @TruffleBoundary - protected RubyString truncateLengthNegative(RubyString string, int newByteLength) { - throw new RaiseException( - getContext(), - getContext().getCoreExceptions().argumentError(formatNegativeError(newByteLength), this)); - } - - @Specialization( - guards = { "newByteLength >= 0", "isNewLengthTooLarge(string, newByteLength)" }) - @TruffleBoundary - protected RubyString truncateLengthTooLong(RubyString string, int newByteLength) { - throw new RaiseException( - getContext(), - coreExceptions().argumentError(formatTooLongError(newByteLength, string.rope), this)); - } - - @Specialization( - guards = { - "newByteLength >= 0", - "!isNewLengthTooLarge(string, newByteLength)" }) - protected RubyString stealStorage(RubyString string, int newByteLength, - @Cached RopeNodes.SubstringNode substringNode) { - string.setRope(substringNode.executeSubstring(string.rope, 0, newByteLength)); - return string; - } - - protected static boolean isNewLengthTooLarge(RubyString string, int newByteLength) { - return newByteLength > string.rope.byteLength(); - } - - @TruffleBoundary - private String formatNegativeError(int count) { - return StringUtils.format("Invalid byte count: %d is negative", count); - } - - @TruffleBoundary - private String formatTooLongError(int count, final Rope rope) { - return StringUtils - .format("Invalid byte count: %d exceeds string size of %d bytes", count, rope.byteLength()); - } - - } -} diff --git a/src/main/java/org/truffleruby/core/string/UTF8Operations.java b/src/main/java/org/truffleruby/core/string/UTF8Operations.java deleted file mode 100644 index cc6e56033781..000000000000 --- a/src/main/java/org/truffleruby/core/string/UTF8Operations.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2016, 2019 Oracle and/or its affiliates. All rights reserved. This - * code is released under a tri EPL/GPL/LGPL license. You can use it, - * redistribute it and/or modify it under the terms of the: - * - * Eclipse Public License version 2.0, or - * GNU General Public License version 2, or - * GNU Lesser General Public License version 2.1. - */ -package org.truffleruby.core.string; - -public abstract class UTF8Operations { - - public static boolean isUTF8ValidOneByte(byte b) { - return b >= 0; - } - - public static boolean isUTF8ValidTwoBytes(byte... bytes) { - assert bytes.length == 2; - - if ((bytes[0] & 0xff) >= 0xc2 && (bytes[0] & 0xff) <= 0xdf) { - return (bytes[1] & 0xff) >= 0x80 && (bytes[1] & 0xff) <= 0xbf; - } - - return false; - } - - public static boolean isUTF8ValidThreeBytes(byte... bytes) { - assert bytes.length == 3; - - if ((bytes[0] & 0xff) < 0xe0 || (bytes[0] & 0xff) > 0xef) { - return false; - } - - if ((bytes[2] & 0xff) < 0x80 || (bytes[2] & 0xff) > 0xbf) { - return false; - } - - if ((bytes[1] & 0xff) >= 0x80 || (bytes[2] & 0xff) <= 0xbf) { - if ((bytes[0] & 0xff) == 0xe0) { - return (bytes[1] & 0xff) >= 0xa0; - } - - if ((bytes[0] & 0xff) == 0xed) { - return (bytes[1] & 0xff) <= 0x9f; - } - - return true; - } - - return false; - } - - public static boolean isUTF8ValidFourBytes(byte... bytes) { - assert bytes.length == 4; - - if ((bytes[3] & 0xff) < 0x80 || (bytes[3] & 0xff) > 0xbf) { - return false; - } - - if ((bytes[2] & 0xff) < 0x80 || (bytes[2] & 0xff) > 0xbf) { - return false; - } - - if ((bytes[0] & 0xff) < 0xf0 || (bytes[0] & 0xff) > 0xf4) { - return false; - } - - if ((bytes[1] & 0xff) >= 0x80 || (bytes[2] & 0xff) <= 0xbf) { - if ((bytes[0] & 0xff) == 0xf0) { - return (bytes[1] & 0xff) >= 0x90; - } - - if ((bytes[0] & 0xff) == 0xf4) { - return (bytes[1] & 0xff) <= 0x8f; - } - - return true; - } - - return false; - } - - public static boolean isUTF8ValidFiveBytes(byte... bytes) { - assert bytes.length == 5; - - // There are currently no valid five byte UTF-8 codepoints. - return false; - } - - public static boolean isUTF8ValidSixBytes(byte... bytes) { - assert bytes.length == 6; - - // There are currently no valid six byte UTF-8 codepoints. - return false; - } - - public static int charWidth(byte b) { - if (b >= 0) { - return 1; - } else { - switch (b & 0xf0) { - case 0xe0: - return 3; - case 0xf0: - return 4; - default: - return 2; - } - } - } -} diff --git a/src/main/java/org/truffleruby/core/support/ByteArrayNodes.java b/src/main/java/org/truffleruby/core/support/ByteArrayNodes.java index 26fa29d22263..f27cb7661624 100644 --- a/src/main/java/org/truffleruby/core/support/ByteArrayNodes.java +++ b/src/main/java/org/truffleruby/core/support/ByteArrayNodes.java @@ -9,17 +9,16 @@ */ package org.truffleruby.core.support; -import com.oracle.truffle.api.library.CachedLibrary; +import com.oracle.truffle.api.dsl.Bind; import com.oracle.truffle.api.object.Shape; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreModule; import org.truffleruby.builtins.UnaryCoreMethodNode; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.klass.RubyClass; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeGuards; -import org.truffleruby.core.rope.RopeNodes; import org.truffleruby.core.string.RubyString; import org.truffleruby.extra.ffi.Pointer; import org.truffleruby.extra.ffi.PointerNodes; @@ -44,7 +43,8 @@ public abstract static class AllocateNode extends UnaryCoreMethodNode { @Specialization protected RubyByteArray allocate(RubyClass rubyClass) { final Shape shape = getLanguage().byteArrayShape; - final RubyByteArray instance = new RubyByteArray(rubyClass, shape, RopeConstants.EMPTY_BYTES); + final RubyByteArray instance = new RubyByteArray(rubyClass, shape, + org.truffleruby.core.array.ArrayUtils.EMPTY_BYTES); AllocationTracing.trace(instance, this); return instance; } @@ -75,23 +75,28 @@ protected int getByte(RubyByteArray byteArray, int index) { @CoreMethod(names = "prepend", required = 1) public abstract static class PrependNode extends CoreMethodArrayArgumentsNode { - @Specialization(guards = "strings.isRubyString(string)") + @Specialization(guards = "strings.isRubyString(string)", limit = "1") protected RubyByteArray prepend(RubyByteArray byteArray, Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached RopeNodes.BytesNode bytesNode) { + @Cached RubyStringLibrary strings, + @Cached TruffleString.CopyToByteArrayNode copyToByteArrayNode) { final byte[] bytes = byteArray.bytes; - final Rope rope = strings.getRope(string); - final int prependLength = rope.byteLength(); + var tstring = strings.getTString(string); + var encoding = strings.getTEncoding(string); + + final int prependLength = tstring.byteLength(encoding); final int originalLength = bytes.length; final int newLength = prependLength + originalLength; final byte[] prependedBytes = new byte[newLength]; - System.arraycopy(bytesNode.execute(rope), 0, prependedBytes, 0, prependLength); + + copyToByteArrayNode.execute(tstring, 0, prependedBytes, 0, prependLength, encoding); System.arraycopy(bytes, 0, prependedBytes, prependLength, originalLength); + final RubyByteArray instance = new RubyByteArray( coreLibrary().byteArrayClass, getLanguage().byteArrayShape, prependedBytes); + AllocationTracing.trace(instance, this); return instance; } @@ -121,12 +126,12 @@ public abstract static class FillNode extends CoreMethodArrayArgumentsNode { @Specialization protected Object fillFromString( - RubyByteArray byteArray, int dstStart, RubyString source, int srcStart, int length, - @Cached RopeNodes.BytesNode bytesNode) { - final Rope rope = source.rope; - final byte[] bytes = byteArray.bytes; - - System.arraycopy(bytesNode.execute(rope), srcStart, bytes, dstStart, length); + RubyByteArray destByteArray, int dstStart, RubyString source, int srcStart, int length, + @Cached RubyStringLibrary libString, + @Cached TruffleString.CopyToByteArrayNode copyToByteArrayNode) { + var tstring = source.tstring; + var encoding = libString.getTEncoding(source); + copyToByteArrayNode.execute(tstring, srcStart, destByteArray.bytes, dstStart, length, encoding); return source; } @@ -151,16 +156,17 @@ protected Object fillFromPointer( public abstract static class LocateNode extends CoreMethodArrayArgumentsNode { @Specialization( - guards = { "isSingleBytePattern(libPattern.getRope(pattern))" }) + guards = { "isSingleBytePattern(patternTString, patternEncoding)" }) protected Object getByteSingleByte(RubyByteArray byteArray, Object pattern, int start, int length, - @Cached RopeNodes.BytesNode bytesNode, + @Cached TruffleString.ReadByteNode readByteNode, @Cached BranchProfile tooSmallStartProfile, @Cached BranchProfile tooLargeStartProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libPattern) { + @Cached RubyStringLibrary libPattern, + @Bind("libPattern.getTString(pattern)") AbstractTruffleString patternTString, + @Bind("libPattern.getTEncoding(pattern)") TruffleString.Encoding patternEncoding) { - final byte[] bytes = byteArray.bytes; - final Rope rope = libPattern.getRope(pattern); - final byte searchByte = bytesNode.execute(rope)[0]; + byte[] bytes = byteArray.bytes; + int searchByte = readByteNode.execute(patternTString, 0, patternEncoding); if (start >= length) { tooLargeStartProfile.enter(); @@ -172,72 +178,38 @@ protected Object getByteSingleByte(RubyByteArray byteArray, Object pattern, int start = 0; } - final int index = ArrayUtils.indexOf(bytes, start, length, searchByte); + final int index = ArrayUtils.indexOf(bytes, start, length, (byte) searchByte); return index == -1 ? nil : index + 1; } @Specialization( - guards = { "!isSingleBytePattern(libPattern.getRope(pattern))" }) + guards = { "!isSingleBytePattern(patternTString, patternEncoding)" }) protected Object getByte(RubyByteArray byteArray, Object pattern, int start, int length, - @Cached RopeNodes.BytesNode bytesNode, - @Cached RopeNodes.CharacterLengthNode characterLengthNode, + @Cached TruffleString.CodePointLengthNode codePointLengthNode, + @Cached TruffleString.GetInternalByteArrayNode getInternalByteArrayNode, + @Cached ConditionProfile noCopyProfile, @Cached ConditionProfile notFoundProfile, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libPattern) { - final Rope patternRope = libPattern.getRope(pattern); - final int index = indexOf( - byteArray.bytes, - start, - length, - bytesNode.execute(patternRope)); + @Cached RubyStringLibrary libPattern, + @Bind("libPattern.getTString(pattern)") AbstractTruffleString patternTString, + @Bind("libPattern.getTEncoding(pattern)") TruffleString.Encoding patternEncoding) { + // TODO (nirvdrum 09-June-2022): Copying the byte array here is wasteful, but ArrayUtils.indexOfWithOrMask does not accept an offset or length for the needle. + // Another possibility would be to create a MutableTruffleString for the RubyByteArray and use ByteIndexOfStringNode, but that would force computation of the coderange of the byte[] + final byte[] patternBytes = TStringUtils.getBytesOrCopy(patternTString, patternEncoding, + getInternalByteArrayNode, noCopyProfile); + + final int index = ArrayUtils.indexOfWithOrMask(byteArray.bytes, start, length, patternBytes, null); if (notFoundProfile.profile(index == -1)) { return nil; } else { - return index + characterLengthNode.execute(patternRope); + return index + codePointLengthNode.execute(patternTString, patternEncoding); } } - protected boolean isSingleBytePattern(Rope rope) { - return RopeGuards.isSingleByteString(rope); + protected boolean isSingleBytePattern(AbstractTruffleString string, TruffleString.Encoding encoding) { + return string.byteLength(encoding) == 1; } - - public int indexOf(byte[] in, int start, int length, byte[] target) { - int targetCount = target.length; - int fromIndex = start; - if (fromIndex >= length) { - return (targetCount == 0 ? length : -1); - } - if (fromIndex < 0) { - fromIndex = 0; - } - if (targetCount == 0) { - return fromIndex; - } - - byte first = target[0]; - int max = length - targetCount; - - for (int i = fromIndex; i <= max; i++) { - if (in[i] != first) { - while (++i <= max && in[i] != first) { - } - } - - if (i <= max) { - int j = i + 1; - int end = j + targetCount - 1; - for (int k = 1; j < end && in[j] == target[k]; j++, k++) { - } - - if (j == end) { - return i; - } - } - } - return -1; - } - } } diff --git a/src/main/java/org/truffleruby/core/support/IONodes.java b/src/main/java/org/truffleruby/core/support/IONodes.java index 163e2ad2be7e..d35b41f52a9c 100644 --- a/src/main/java/org/truffleruby/core/support/IONodes.java +++ b/src/main/java/org/truffleruby/core/support/IONodes.java @@ -66,7 +66,7 @@ import java.io.OutputStream; import java.util.Arrays; -import com.oracle.truffle.api.library.CachedLibrary; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreModule; @@ -76,9 +76,6 @@ import org.truffleruby.builtins.UnaryCoreMethodNode; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.klass.RubyClass; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.string.StringNodes.MakeStringNode; import org.truffleruby.core.thread.RubyThread; import org.truffleruby.core.thread.ThreadManager.BlockingAction; import org.truffleruby.extra.ffi.Pointer; @@ -133,21 +130,25 @@ protected RubyIO fd(RubyIO io, int fd) { @Primitive(name = "file_fnmatch", lowerFixnum = 2) public abstract static class FileFNMatchPrimitiveNode extends PrimitiveArrayArgumentsNode { - @TruffleBoundary - @Specialization(guards = { "stringsPattern.isRubyString(pattern)", "stringsPath.isRubyString(path)" }) + @Specialization(guards = { "stringsPattern.isRubyString(pattern)", "stringsPath.isRubyString(path)" }, + limit = "1") protected boolean fnmatch(Object pattern, Object path, int flags, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsPattern, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsPath) { - final Rope patternRope = stringsPattern.getRope(pattern); - final Rope pathRope = stringsPath.getRope(path); + @Cached RubyStringLibrary stringsPattern, + @Cached RubyStringLibrary stringsPath, + @Cached TruffleString.GetInternalByteArrayNode getInternalByteArrayPatternNode, + @Cached TruffleString.GetInternalByteArrayNode getInternalByteArrayPathNode) { + var patternByteArray = getInternalByteArrayPatternNode.execute(stringsPattern.getTString(pattern), + stringsPattern.getTEncoding(pattern)); + var pathByteArray = getInternalByteArrayPathNode.execute(stringsPath.getTString(path), + stringsPath.getTEncoding(path)); return fnmatch( - patternRope.getBytes(), - 0, - patternRope.byteLength(), - pathRope.getBytes(), - 0, - pathRope.byteLength(), + patternByteArray.getArray(), + patternByteArray.getOffset(), + patternByteArray.getEnd(), + pathByteArray.getArray(), + pathByteArray.getOffset(), + pathByteArray.getEnd(), flags) != FNM_NOMATCH; } @@ -192,7 +193,8 @@ private static int fnmatch_helper(byte[] bytes, int pstart, int pend, byte[] str switch (c) { case '?': if (s >= send || (pathname && isdirsep(string[s])) || - (period && string[s] == '.' && (s == 0 || (pathname && isdirsep(string[s - 1]))))) { + (period && string[s] == '.' && + (s == sstart || (pathname && isdirsep(string[s - 1]))))) { return FNM_NOMATCH; } s++; @@ -201,7 +203,8 @@ private static int fnmatch_helper(byte[] bytes, int pstart, int pend, byte[] str while (pat < pend && (c = (char) (bytes[pat++] & 0xFF)) == '*') { } if (s < send && - (period && string[s] == '.' && (s == 0 || (pathname && isdirsep(string[s - 1]))))) { + (period && string[s] == '.' && + (s == sstart || (pathname && isdirsep(string[s - 1]))))) { return FNM_NOMATCH; } if (pat > pend || (pat == pend && c == '*')) { @@ -233,7 +236,8 @@ private static int fnmatch_helper(byte[] bytes, int pstart, int pend, byte[] str return FNM_NOMATCH; case '[': if (s >= send || (pathname && isdirsep(string[s]) || - (period && string[s] == '.' && (s == 0 || (pathname && isdirsep(string[s - 1])))))) { + (period && string[s] == '.' && + (s == sstart || (pathname && isdirsep(string[s - 1])))))) { return FNM_NOMATCH; } pat = range(bytes, pat, pend, (char) (string[s] & 0xFF), flags); @@ -275,6 +279,7 @@ private static int fnmatch_helper(byte[] bytes, int pstart, int pend, byte[] str return s >= send ? 0 : FNM_NOMATCH; } + @TruffleBoundary public static int fnmatch( byte[] bytes, int pstart, int pend, byte[] string, int sstart, int send, int flags) { @@ -436,7 +441,7 @@ public abstract static class IOReadPolyglotNode extends PrimitiveArrayArgumentsN @TruffleBoundary @Specialization protected Object read(int length, - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { final InputStream stream = getContext().getEnv().in(); final byte[] buffer = new byte[length]; final int bytesRead = getContext().getThreadManager().runUntilResult(this, () -> { @@ -458,7 +463,7 @@ protected Object read(int length, bytes = Arrays.copyOf(buffer, bytesRead); } - return makeStringNode.executeMake(bytes, Encodings.BINARY, CodeRange.CR_UNKNOWN); + return createString(fromByteArrayNode, bytes, Encodings.BINARY); } } @@ -467,9 +472,9 @@ protected Object read(int length, public abstract static class IOWritePolyglotNode extends PrimitiveArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = "strings.isRubyString(string)") + @Specialization(guards = "strings.isRubyString(string)", limit = "1") protected int write(int fd, Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { final OutputStream stream; switch (fd) { @@ -484,19 +489,18 @@ protected int write(int fd, Object string, throw CompilerDirectives.shouldNotReachHere(); } - final Rope rope = strings.getRope(string); - final byte[] bytes = rope.getBytes(); + var byteArray = strings.getTString(string).getInternalByteArrayUncached(strings.getTEncoding(string)); getContext().getThreadManager().runUntilResult(this, () -> { try { - stream.write(bytes); + stream.write(byteArray.getArray(), byteArray.getOffset(), byteArray.getLength()); } catch (IOException e) { throw new RaiseException(getContext(), coreExceptions().ioError(e, this)); } return BlockingAction.SUCCESS; }); - return rope.byteLength(); + return byteArray.getLength(); } } diff --git a/src/main/java/org/truffleruby/core/support/PRNGRandomizerNodes.java b/src/main/java/org/truffleruby/core/support/PRNGRandomizerNodes.java index b4a3e8685098..8e96f875d9e5 100644 --- a/src/main/java/org/truffleruby/core/support/PRNGRandomizerNodes.java +++ b/src/main/java/org/truffleruby/core/support/PRNGRandomizerNodes.java @@ -35,6 +35,7 @@ import java.math.BigInteger; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.algorithms.Randomizer; @@ -48,9 +49,7 @@ import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.numeric.BignumOperations; import org.truffleruby.core.numeric.RubyBignum; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes.MakeStringNode; import org.truffleruby.language.Visibility; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; @@ -207,7 +206,7 @@ public abstract static class RandomizerBytesPrimitiveNode extends PrimitiveArray @TruffleBoundary @Specialization protected RubyString genRandBytes(RubyPRNGRandomizer randomizer, int length, - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { final byte[] bytes = new byte[length]; int idx = 0; for (; length >= 4; length -= 4) { @@ -225,7 +224,7 @@ protected RubyString genRandBytes(RubyPRNGRandomizer randomizer, int length, } } - return makeStringNode.executeMake(bytes, Encodings.BINARY, CodeRange.CR_UNKNOWN); + return createString(fromByteArrayNode, bytes, Encodings.BINARY); } } diff --git a/src/main/java/org/truffleruby/core/support/TypeNodes.java b/src/main/java/org/truffleruby/core/support/TypeNodes.java index df13e09ec317..8d35f54e1444 100644 --- a/src/main/java/org/truffleruby/core/support/TypeNodes.java +++ b/src/main/java/org/truffleruby/core/support/TypeNodes.java @@ -15,6 +15,7 @@ import com.oracle.truffle.api.dsl.GenerateNodeFactory; import com.oracle.truffle.api.dsl.GenerateUncached; import com.oracle.truffle.api.profiles.ConditionProfile; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.builtins.CoreModule; import org.truffleruby.builtins.Primitive; import org.truffleruby.builtins.PrimitiveArrayArgumentsNode; @@ -32,10 +33,8 @@ import org.truffleruby.core.kernel.KernelNodes.ToSNode; import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.module.RubyModule; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.ImmutableRubyString; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.language.Nil; import org.truffleruby.language.NotProvided; @@ -338,12 +337,12 @@ protected RubyString toS(Object obj, @Primitive(name = "module_name") public abstract static class ModuleNameNode extends PrimitiveArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @Specialization protected RubyString moduleName(RubyModule module) { final String name = module.fields.getName(); - return makeStringNode.executeMake(name, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, name, Encodings.UTF_8); } } diff --git a/src/main/java/org/truffleruby/core/symbol/CoreSymbols.java b/src/main/java/org/truffleruby/core/symbol/CoreSymbols.java index e1b8fbc69e39..bb9838841e10 100644 --- a/src/main/java/org/truffleruby/core/symbol/CoreSymbols.java +++ b/src/main/java/org/truffleruby/core/symbol/CoreSymbols.java @@ -12,11 +12,11 @@ import java.util.ArrayList; import java.util.List; -import org.jcodings.specific.USASCIIEncoding; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.LeafRope; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.string.TStringConstants; // GENERATED BY tool/generate-core-symbols.rb // This file is automatically generated from tool/id.def with 'jt build core-symbols' @@ -198,12 +198,13 @@ public class CoreSymbols { public final RubySymbol ERROR_INFO = createRubySymbol("$!", toGlobal(236)); public RubySymbol createRubySymbol(String string, long id) { - LeafRope rope = RopeConstants.lookupUSASCII(string); - if (rope == null) { - rope = RopeOperations.encodeAscii(string, USASCIIEncoding.INSTANCE); + TruffleString tstring = TStringConstants.lookupUSASCIITString(string); + if (tstring == null) { + byte[] bytes = StringOperations.encodeAsciiBytes(string); + tstring = TStringUtils.fromByteArray(bytes, TruffleString.Encoding.US_ASCII); } - final RubySymbol symbol = new RubySymbol(string, rope, Encodings.US_ASCII, id); + final RubySymbol symbol = new RubySymbol(string, tstring, Encodings.US_ASCII, id); CORE_SYMBOLS.add(symbol); if (id != RubySymbol.UNASSIGNED_ID) { diff --git a/src/main/java/org/truffleruby/core/symbol/RubySymbol.java b/src/main/java/org/truffleruby/core/symbol/RubySymbol.java index 4c8482e38d7b..c2b3b3e97046 100644 --- a/src/main/java/org/truffleruby/core/symbol/RubySymbol.java +++ b/src/main/java/org/truffleruby/core/symbol/RubySymbol.java @@ -12,12 +12,12 @@ import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.RootCallTarget; import com.oracle.truffle.api.library.CachedLibrary; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.core.Hashing; import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.klass.RubyClass; -import org.truffleruby.core.rope.LeafRope; import org.truffleruby.core.string.ImmutableRubyString; import com.oracle.truffle.api.dsl.Cached; @@ -40,7 +40,7 @@ public final class RubySymbol extends ImmutableRubyObjectNotCopyable implements public final RubyEncoding encoding; private final String string; - private final LeafRope rope; + public final TruffleString tstring; private final int javaStringHashCode; private final long id; private ImmutableRubyString name; @@ -48,18 +48,19 @@ public final class RubySymbol extends ImmutableRubyObjectNotCopyable implements private volatile RootCallTarget callTargetNoRefinements = null; - RubySymbol(String string, LeafRope rope, RubyEncoding encoding, long id) { - assert rope.encoding == encoding.jcoding; + RubySymbol(String string, TruffleString tstring, RubyEncoding encoding, long id) { + assert tstring.isManaged(); + assert tstring.isCompatibleTo(encoding.tencoding); this.encoding = encoding; this.string = string; - this.rope = rope; + this.tstring = tstring; this.javaStringHashCode = string.hashCode(); this.id = id; this.type = Identifiers.stringToType(string); } - RubySymbol(String string, LeafRope rope, RubyEncoding encoding) { - this(string, rope, encoding, UNASSIGNED_ID); + RubySymbol(String string, TruffleString tstring, RubyEncoding encoding) { + this(string, tstring, encoding, UNASSIGNED_ID); } public long getId() { @@ -70,10 +71,6 @@ public String getString() { return string; } - public LeafRope getRope() { - return rope; - } - public IdentifierType getType() { return this.type; } @@ -104,7 +101,7 @@ public String toString() { public ImmutableRubyString getName(RubyLanguage language) { if (name == null) { - name = language.getFrozenStringLiteral(this.getRope()); + name = language.getFrozenStringLiteral(tstring, encoding); } return name; } diff --git a/src/main/java/org/truffleruby/core/symbol/SymbolNodes.java b/src/main/java/org/truffleruby/core/symbol/SymbolNodes.java index e768aff3cba6..bfb4ef65d3a2 100644 --- a/src/main/java/org/truffleruby/core/symbol/SymbolNodes.java +++ b/src/main/java/org/truffleruby/core/symbol/SymbolNodes.java @@ -29,7 +29,6 @@ import org.truffleruby.core.proc.RubyProc; import org.truffleruby.core.string.ImmutableRubyString; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.language.LexicalScope; import org.truffleruby.language.RubyBaseNode; import org.truffleruby.language.RubyLambdaRootNode; @@ -75,12 +74,10 @@ protected RubyArray allSymbols() { @CoreMethod(names = { "==", "eql?" }, required = 1) public abstract static class EqualNode extends CoreMethodArrayArgumentsNode { - @Specialization protected boolean equal(RubySymbol a, Object b) { return a == b; } - } @GenerateUncached @@ -269,33 +266,26 @@ protected int getCacheLimit() { @CoreMethod(names = "to_s") public abstract static class ToSNode extends CoreMethodArrayArgumentsNode { - @Specialization - protected RubyString toS(RubySymbol symbol, - @Cached StringNodes.MakeStringNode makeStringNode) { - return makeStringNode.fromRope(symbol.getRope(), symbol.encoding); + protected RubyString toS(RubySymbol symbol) { + return createString(symbol.tstring, symbol.encoding); } - } @CoreMethod(names = "name") public abstract static class NameNode extends CoreMethodArrayArgumentsNode { - @Specialization protected ImmutableRubyString toS(RubySymbol symbol) { return symbol.getName(getLanguage()); } - } @CoreMethod(names = { "__allocate__", "__layout_allocate__" }, constructor = true, visibility = Visibility.PRIVATE) public abstract static class AllocateNode extends UnaryCoreMethodNode { - @Specialization protected Object allocate(RubyClass rubyClass) { throw new RaiseException(getContext(), coreExceptions().typeErrorAllocatorUndefinedFor(rubyClass, this)); } - } } diff --git a/src/main/java/org/truffleruby/core/symbol/SymbolTable.java b/src/main/java/org/truffleruby/core/symbol/SymbolTable.java index 4c0c4535932d..e303e55c0ca6 100644 --- a/src/main/java/org/truffleruby/core/symbol/SymbolTable.java +++ b/src/main/java/org/truffleruby/core/symbol/SymbolTable.java @@ -11,46 +11,43 @@ import java.util.Collection; -import org.jcodings.specific.USASCIIEncoding; -import org.jcodings.specific.UTF8Encoding; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.collections.WeakValueCache; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.LeafRope; -import org.truffleruby.core.rope.NativeRope; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeCache; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.core.rope.RopeWithEncoding; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.string.TStringCache; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.core.string.StringOperations; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; public class SymbolTable { - private final RopeCache ropeCache; + private final TStringCache tstringCache; // A cache for j.l.String to Symbols. Entries are kept as long as the Symbol is alive. // However, this doesn't matter as the cache entries will be re-created when used. private final WeakValueCache stringToSymbolCache = new WeakValueCache<>(); - // Weak map of RopeWithEncoding to Symbol to keep Symbols unique. + // Weak map of TStringWithEncoding to Symbol to keep Symbols unique. // As long as the Symbol is referenced, the entry will stay in the symbolMap. - private final WeakValueCache symbolMap = new WeakValueCache<>(); + private final WeakValueCache symbolMap = new WeakValueCache<>(); - public SymbolTable(RopeCache ropeCache, CoreSymbols coreSymbols) { - this.ropeCache = ropeCache; + public SymbolTable(TStringCache tstringCache, CoreSymbols coreSymbols) { + this.tstringCache = tstringCache; addCoreSymbols(coreSymbols); } private void addCoreSymbols(CoreSymbols coreSymbols) { for (RubySymbol symbol : coreSymbols.CORE_SYMBOLS) { - final Rope rope = symbol.getRope(); - final RopeWithEncoding ropeWithEncoding = normalizeRopeForLookup(rope, symbol.encoding); - assert rope == ropeWithEncoding.getRope(); - assert rope == ropeCache.getRope(rope); + var rope = symbol.tstring; + var lookup = normalizeForLookup(rope, symbol.encoding); + assert rope == lookup.tstring; + assert rope == tstringCache.getTString(symbol.tstring, symbol.encoding); - final RubySymbol existing = symbolMap.put(ropeWithEncoding, symbol); + final RubySymbol existing = symbolMap.put(lookup, symbol); if (existing != null) { throw new AssertionError("Duplicate Symbol in SymbolTable: " + existing); } @@ -69,16 +66,16 @@ public RubySymbol getSymbol(String string) { return symbol; } - final LeafRope rope; + final TruffleString str; final RubyEncoding encoding; if (StringOperations.isAsciiOnly(string)) { - rope = RopeOperations.encodeAscii(string, USASCIIEncoding.INSTANCE); + str = TStringUtils.usAsciiString(string); encoding = Encodings.US_ASCII; } else { - rope = StringOperations.encodeRope(string, UTF8Encoding.INSTANCE); + str = TStringUtils.utf8TString(string); encoding = Encodings.UTF_8; } - symbol = getSymbol(rope, encoding); + symbol = getSymbol(str, encoding); // Add it to the direct java.lang.String to Symbol cache stringToSymbolCache.addInCacheIfAbsent(string, symbol); @@ -87,43 +84,40 @@ public RubySymbol getSymbol(String string) { } @TruffleBoundary - public RubySymbol getSymbol(Rope rope, RubyEncoding encoding) { - final RopeWithEncoding ropeEncodingForLookup = normalizeRopeForLookup(rope, encoding); - final RubySymbol symbol = symbolMap.get(ropeEncodingForLookup); + public RubySymbol getSymbol(AbstractTruffleString tstring, RubyEncoding originalEncoding) { + var key = normalizeForLookup(tstring, originalEncoding); + final RubySymbol symbol = symbolMap.get(key); if (symbol != null) { return symbol; } - final LeafRope cachedRope = ropeCache.getRope(ropeEncodingForLookup.getRope()); - final RubyEncoding symbolEncoding = ropeEncodingForLookup.getEncoding(); - final RubySymbol newSymbol = createSymbol(cachedRope, symbolEncoding); - // Use a RopeWithEncoding with the cached Rope in symbolMap, since the Symbol refers to it and so we - // do not keep rope alive unnecessarily. - return symbolMap.addInCacheIfAbsent(new RopeWithEncoding(cachedRope, symbolEncoding), newSymbol); + final RubyEncoding symbolEncoding = key.encoding; + var cachedTString = tstringCache.getTString(key.tstring, symbolEncoding); + final RubySymbol newSymbol = createSymbol(cachedTString, symbolEncoding); + // Use a TStringWithEncoding with the cached TString in symbolMap, since the Symbol refers to it and so we + // do not keep the other TString alive unnecessarily. + return symbolMap.addInCacheIfAbsent(new TStringWithEncoding(cachedTString, symbolEncoding), newSymbol); } @TruffleBoundary - public RubySymbol getSymbolIfExists(Rope rope, RubyEncoding encoding) { - final RopeWithEncoding ropeKey = normalizeRopeForLookup(rope, encoding); - return symbolMap.get(ropeKey); + public RubySymbol getSymbolIfExists(AbstractTruffleString tstring, RubyEncoding encoding) { + var key = normalizeForLookup(tstring, encoding); + return symbolMap.get(key); } - private RopeWithEncoding normalizeRopeForLookup(Rope rope, RubyEncoding encoding) { - if (rope instanceof NativeRope) { - rope = ((NativeRope) rope).toLeafRope(); - } + private TStringWithEncoding normalizeForLookup(AbstractTruffleString rope, RubyEncoding encoding) { + TruffleString string = rope.asManagedTruffleStringUncached(encoding.tencoding); + var strEnc = new TStringWithEncoding(string, encoding); - if (rope.isAsciiOnly() && rope.getEncoding() != USASCIIEncoding.INSTANCE) { - rope = RopeOperations.withEncoding(rope, USASCIIEncoding.INSTANCE); - encoding = Encodings.US_ASCII; + if (strEnc.isAsciiOnly() && encoding != Encodings.US_ASCII) { + strEnc = strEnc.forceEncoding(Encodings.US_ASCII); } - return new RopeWithEncoding(rope, encoding); + return strEnc; } - private RubySymbol createSymbol(LeafRope cachedRope, RubyEncoding encoding) { - final String string = RopeOperations.decodeOrEscapeBinaryRope(cachedRope); - return new RubySymbol(string, cachedRope, encoding); + private RubySymbol createSymbol(TruffleString truffleString, RubyEncoding encoding) { + return new RubySymbol(truffleString.toString(), truffleString, encoding); } @TruffleBoundary diff --git a/src/main/java/org/truffleruby/core/thread/ThreadBacktraceLocationNodes.java b/src/main/java/org/truffleruby/core/thread/ThreadBacktraceLocationNodes.java index 01b2b6e54073..484da8cd7f27 100644 --- a/src/main/java/org/truffleruby/core/thread/ThreadBacktraceLocationNodes.java +++ b/src/main/java/org/truffleruby/core/thread/ThreadBacktraceLocationNodes.java @@ -9,17 +9,14 @@ */ package org.truffleruby.core.thread; -import org.jcodings.specific.UTF8Encoding; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreModule; import org.truffleruby.builtins.UnaryCoreMethodNode; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes.MakeStringNode; -import org.truffleruby.core.string.StringOperations; import org.truffleruby.language.RubyBaseNode; import org.truffleruby.language.backtrace.Backtrace; @@ -50,16 +47,14 @@ public abstract static class AbsolutePathNode extends UnaryCoreMethodNode { @TruffleBoundary @Specialization - protected Object absolutePath(RubyBacktraceLocation threadBacktraceLocation, - @Cached MakeStringNode makeStringNode) { + protected Object absolutePath(RubyBacktraceLocation threadBacktraceLocation) { final SourceSection sourceSection = getAvailableSourceSection(getContext(), threadBacktraceLocation); - return getAbsolutePath(sourceSection, makeStringNode, this); + return getAbsolutePath(sourceSection, this); } @TruffleBoundary - public static Object getAbsolutePath(SourceSection sourceSection, MakeStringNode makeStringNode, - RubyBaseNode node) { + public static Object getAbsolutePath(SourceSection sourceSection, RubyBaseNode node) { var context = node.getContext(); var language = node.getLanguage(); @@ -72,12 +67,12 @@ public static Object getAbsolutePath(SourceSection sourceSection, MakeStringNode } else if (source.getPath() != null) { // A normal file final String path = language.getSourcePath(source); final String canonicalPath = context.getFeatureLoader().canonicalize(path); - final Rope cachedRope = language.ropeCache - .getRope(StringOperations.encodeRope(canonicalPath, UTF8Encoding.INSTANCE)); - return makeStringNode.fromRope(cachedRope, Encodings.UTF_8); + var cachedRope = language.tstringCache.getTString(TStringUtils.utf8TString(canonicalPath), + Encodings.UTF_8); + return node.createString(cachedRope, Encodings.UTF_8); } else { // eval() - final Rope cachedPath = language.getPathToRopeCache().getCachedPath(source); - return makeStringNode.fromRope(cachedPath, Encodings.UTF_8); + var cachedPath = language.getPathToTStringCache().getCachedPath(source); + return node.createString(cachedPath, Encodings.UTF_8); } } } @@ -89,15 +84,14 @@ public abstract static class PathNode extends UnaryCoreMethodNode { @TruffleBoundary @Specialization - protected RubyString path(RubyBacktraceLocation threadBacktraceLocation, - @Cached MakeStringNode makeStringNode) { + protected RubyString path(RubyBacktraceLocation threadBacktraceLocation) { final SourceSection sourceSection = getAvailableSourceSection(getContext(), threadBacktraceLocation); if (sourceSection == null) { return coreStrings().UNKNOWN.createInstance(getContext()); } else { - final Rope path = getLanguage().getPathToRopeCache().getCachedPath(sourceSection.getSource()); - return makeStringNode.fromRope(path, Encodings.UTF_8); + var path = getLanguage().getPathToTStringCache().getCachedPath(sourceSection.getSource()); + return createString(path, Encodings.UTF_8); } } @@ -108,13 +102,13 @@ public abstract static class LabelNode extends UnaryCoreMethodNode { @Specialization protected RubyString label(RubyBacktraceLocation threadBacktraceLocation, - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { final Backtrace backtrace = threadBacktraceLocation.backtrace; final int index = threadBacktraceLocation.activationIndex; final TruffleStackTraceElement element = backtrace.getStackTrace()[index]; final String label = Backtrace.labelFor(element); - return makeStringNode.executeMake(label, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, label, Encodings.UTF_8); } } @@ -122,13 +116,13 @@ protected RubyString label(RubyBacktraceLocation threadBacktraceLocation, public abstract static class BaseLabelNode extends UnaryCoreMethodNode { @Specialization protected RubyString label(RubyBacktraceLocation threadBacktraceLocation, - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { final Backtrace backtrace = threadBacktraceLocation.backtrace; final int index = threadBacktraceLocation.activationIndex; final TruffleStackTraceElement element = backtrace.getStackTrace()[index]; final String baseLabel = Backtrace.baseLabelFor(element); - return makeStringNode.executeMake(baseLabel, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, baseLabel, Encodings.UTF_8); } } @@ -148,7 +142,7 @@ protected int lineno(RubyBacktraceLocation threadBacktraceLocation) { @CoreMethod(names = "to_s") public abstract static class ToSNode extends UnaryCoreMethodNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @Specialization protected RubyString toS(RubyBacktraceLocation threadBacktraceLocation) { @@ -158,7 +152,7 @@ protected RubyString toS(RubyBacktraceLocation threadBacktraceLocation) { final String description = getContext() .getUserBacktraceFormatter() .formatLine(backtrace.getStackTrace(), index, null); - return makeStringNode.executeMake(description, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, description, Encodings.UTF_8); } } diff --git a/src/main/java/org/truffleruby/core/thread/ThreadNodes.java b/src/main/java/org/truffleruby/core/thread/ThreadNodes.java index 75b1c4bc58ed..d14964d1a549 100644 --- a/src/main/java/org/truffleruby/core/thread/ThreadNodes.java +++ b/src/main/java/org/truffleruby/core/thread/ThreadNodes.java @@ -49,6 +49,7 @@ import com.oracle.truffle.api.TruffleSafepoint.Interrupter; import com.oracle.truffle.api.frame.VirtualFrame; import com.oracle.truffle.api.profiles.ConditionProfile; +import com.oracle.truffle.api.strings.TruffleString; import org.graalvm.collections.Pair; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; @@ -78,9 +79,7 @@ import org.truffleruby.core.numeric.RubyBignum; import org.truffleruby.core.proc.ProcOperations; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.core.string.StringUtils; import org.truffleruby.core.support.RubyPRNGRandomizer; import org.truffleruby.core.symbol.RubySymbol; @@ -564,7 +563,7 @@ protected Object pass() { @CoreMethod(names = "status") public abstract static class StatusNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @Specialization protected Object status(RubyThread self) { @@ -577,8 +576,7 @@ protected Object status(RubyThread self) { return false; } } - return makeStringNode - .executeMake(StringUtils.toLowerCase(status.name()), Encodings.US_ASCII, CodeRange.CR_7BIT); + return createString(fromJavaStringNode, StringUtils.toLowerCase(status.name()), Encodings.US_ASCII); // CR_7BIT } } @@ -875,12 +873,11 @@ public void run(RubyThread rubyThread, Node currentNode) { @Primitive(name = "thread_source_location") public abstract static class ThreadSourceLocationNode extends PrimitiveArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @Specialization protected RubyString sourceLocation(RubyThread thread) { - return makeStringNode - .executeMake(thread.sourceLocation, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, thread.sourceLocation, Encodings.UTF_8); } } diff --git a/src/main/java/org/truffleruby/core/time/GetTimeZoneNode.java b/src/main/java/org/truffleruby/core/time/GetTimeZoneNode.java index fbdd5f976702..6bafd302bb2c 100644 --- a/src/main/java/org/truffleruby/core/time/GetTimeZoneNode.java +++ b/src/main/java/org/truffleruby/core/time/GetTimeZoneNode.java @@ -36,6 +36,7 @@ import com.oracle.truffle.api.CompilerDirectives; import org.truffleruby.RubyLanguage; import org.truffleruby.language.RubyBaseNode; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.control.RaiseException; import org.truffleruby.language.dispatch.DispatchNode; import org.truffleruby.language.library.RubyStringLibrary; @@ -76,7 +77,7 @@ protected TimeZoneAndName getTimeZone(Object tz) { String tzString = ""; final RubyStringLibrary libString = RubyStringLibrary.getUncached(); if (libString.isRubyString(tz)) { - tzString = libString.getJavaString(tz); + tzString = RubyGuards.getJavaString(tz); } if (tz == nil) { diff --git a/src/main/java/org/truffleruby/core/time/RubyDateFormatter.java b/src/main/java/org/truffleruby/core/time/RubyDateFormatter.java index f5747ef318f1..7a8807870ab1 100644 --- a/src/main/java/org/truffleruby/core/time/RubyDateFormatter.java +++ b/src/main/java/org/truffleruby/core/time/RubyDateFormatter.java @@ -58,22 +58,19 @@ import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.nodes.ExplodeLoop; -import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; -import org.jcodings.specific.UTF8Encoding; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.exception.ErrnoErrorNode; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.LazyIntRope; -import org.truffleruby.core.rope.LeafRope; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.TStringBuilder; import org.truffleruby.core.string.RubyString; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.core.string.StringOperations; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.backtrace.Backtrace; import org.truffleruby.language.control.RaiseException; @@ -187,7 +184,7 @@ private static Token findToken(char c) { public static class Token { private final Format format; private final Object data; - private final LeafRope rope; + private final TruffleString tstring; protected Token(Format format) { this(format, null); @@ -197,17 +194,14 @@ protected Token(Format formatString, Object data) { this(formatString, data, null); } - protected Token(Format formatString, Object data, LeafRope rope) { + protected Token(Format formatString, Object data, TruffleString tstring) { this.format = formatString; this.data = data; - this.rope = rope; + this.tstring = tstring; } public static Token str(String str) { - return new Token( - Format.FORMAT_STRING, - str, - StringOperations.encodeRope(str, UTF8Encoding.INSTANCE, CodeRange.CR_UNKNOWN)); + return new Token(Format.FORMAT_STRING, str, TStringUtils.utf8TString(str)); } public static Token format(char c) { @@ -233,8 +227,8 @@ public Object getData() { return data; } - public LeafRope getRope() { - return rope; + public TruffleString getTString() { + return tstring; } /** Gets the format. @@ -262,22 +256,22 @@ private static void addToPattern(List compiledPattern, String str) { } @TruffleBoundary - public static Token[] compilePattern(Rope pattern, boolean dateLibrary, RubyContext context, Node currentNode) { + public static Token[] compilePattern(AbstractTruffleString pattern, RubyEncoding encoding, boolean dateLibrary, + RubyContext context, Node currentNode) { List compiledPattern = new LinkedList<>(); - Encoding enc = pattern.getEncoding(); - if (!enc.isAsciiCompatible()) { + if (!encoding.isAsciiCompatible) { throw new RaiseException( context, context.getCoreExceptions().argumentError( "format should have ASCII compatible encoding", currentNode)); } - if (enc != ASCIIEncoding.INSTANCE) { // default for ByteList - compiledPattern.add(new Token(Format.FORMAT_ENCODING, enc)); + if (encoding != Encodings.BINARY) { // default for ByteList + compiledPattern.add(new Token(Format.FORMAT_ENCODING, encoding)); } - StrftimeLexer lexer = new StrftimeLexer(RopeOperations.decodeRope(pattern)); + StrftimeLexer lexer = new StrftimeLexer(TStringUtils.toJavaStringOrThrow(pattern, encoding)); Token token; while ((token = lexer.yylex()) != null) { @@ -372,10 +366,10 @@ enum FieldType { } @TruffleBoundary - public static RopeBuilder formatToRopeBuilder(Token[] compiledPattern, ZonedDateTime dt, Object zone, + public static TStringBuilder formatToRopeBuilder(Token[] compiledPattern, ZonedDateTime dt, Object zone, RubyContext context, RubyLanguage language, Node currentNode, ErrnoErrorNode errnoErrorNode) { RubyTimeOutputFormatter formatter = RubyTimeOutputFormatter.DEFAULT_FORMATTER; - RopeBuilder toAppendTo = new RopeBuilder(); + TStringBuilder toAppendTo = new TStringBuilder(); for (Token token : compiledPattern) { String output = null; @@ -385,7 +379,7 @@ public static RopeBuilder formatToRopeBuilder(Token[] compiledPattern, ZonedDate switch (format) { case FORMAT_ENCODING: - toAppendTo.setEncoding((Encoding) token.getData()); + toAppendTo.setEncoding((RubyEncoding) token.getData()); continue; // go to next token case FORMAT_OUTPUT: formatter = (RubyTimeOutputFormatter) token.getData(); @@ -550,8 +544,8 @@ public static RopeBuilder formatToRopeBuilder(Token[] compiledPattern, ZonedDate output = formatter.format(output, value, type); } catch (IndexOutOfBoundsException ioobe) { final Backtrace backtrace = context.getCallStack().getBacktrace(currentNode); - final Rope messageRope = StringOperations.encodeRope("strftime", UTF8Encoding.INSTANCE); - final RubyString message = StringOperations.createUTF8String(context, language, messageRope); + final RubyString message = StringOperations.createUTF8String(context, language, "strftime"); + throw new RaiseException( context, errnoErrorNode.execute(null, context.getCoreLibrary().getErrnoValue("ERANGE"), message, @@ -576,7 +570,7 @@ public static boolean formatCanBeFast(Token[] compiledPattern) { switch (format) { case FORMAT_ENCODING: // Only handle UTF-8 for fast formats - if (token.getData() != UTF8Encoding.INSTANCE) { + if (token.getData() != Encodings.UTF_8) { return false; } break; @@ -619,12 +613,15 @@ public static boolean formatCanBeFast(Token[] compiledPattern) { } @ExplodeLoop - public static Rope formatToRopeFast(Token[] compiledPattern, ZonedDateTime dt, - RopeNodes.ConcatNode concatNode) { - Rope rope = null; + public static TruffleString formatToRopeFast(Token[] compiledPattern, ZonedDateTime dt, + TruffleString.ConcatNode concatNode, + TruffleString.FromLongNode fromLongNode, + TruffleString.CodePointLengthNode codePointLengthNode) { + final var utf8 = Encodings.UTF_8.tencoding; + TruffleString tstring = TStringConstants.EMPTY_UTF8; for (Token token : compiledPattern) { - final Rope appendRope; + final TruffleString appendTString; switch (token.getFormat()) { case FORMAT_ENCODING: @@ -632,22 +629,22 @@ public static Rope formatToRopeFast(Token[] compiledPattern, ZonedDateTime dt, continue; case FORMAT_STRING: - appendRope = token.getRope(); + appendTString = token.getTString(); break; case FORMAT_DAY: - appendRope = RopeConstants.paddedNumber(dt.getDayOfMonth()); + appendTString = TStringConstants.paddedNumber(dt.getDayOfMonth()); break; case FORMAT_HOUR: - appendRope = RopeConstants.paddedNumber(dt.getHour()); + appendTString = TStringConstants.paddedNumber(dt.getHour()); break; case FORMAT_MINUTES: - appendRope = RopeConstants.paddedNumber(dt.getMinute()); + appendTString = TStringConstants.paddedNumber(dt.getMinute()); break; case FORMAT_MONTH: - appendRope = RopeConstants.paddedNumber(dt.getMonthValue()); + appendTString = TStringConstants.paddedNumber(dt.getMonthValue()); break; case FORMAT_SECONDS: - appendRope = RopeConstants.paddedNumber(dt.getSecond()); + appendTString = TStringConstants.paddedNumber(dt.getSecond()); break; case FORMAT_YEAR_LONG: { @@ -655,29 +652,27 @@ public static Rope formatToRopeFast(Token[] compiledPattern, ZonedDateTime dt, assert value >= 1000; assert value <= 9999; - appendRope = new LazyIntRope(value, UTF8Encoding.INSTANCE, 4); + appendTString = fromLongNode.execute(value, utf8, true); } break; case FORMAT_NANOSEC: { // always %6N, checked by formatCanBeFast() final int nano = dt.getNano(); - final LazyIntRope microSecondRope = new LazyIntRope(nano / 1000, UTF8Encoding.INSTANCE); + + var microSecondTString = fromLongNode.execute(nano / 1000, utf8, true); // This fast-path only handles the '%6N' format, so output will always be 6 characters long. final int length = 6; - final int padding = length - microSecondRope.characterLength(); + final int padding = length - codePointLengthNode.execute(microSecondTString, utf8); // `padding` is guaranteed to be >= 0 because `nano` can be at most 9 digits long before the // conversion to microseconds. The division further constrains the rope to be at most 6 digits long. - assert padding >= 0 : microSecondRope; + assert padding >= 0 : microSecondTString; if (padding == 0) { - appendRope = microSecondRope; + appendTString = microSecondTString; } else { - appendRope = concatNode - .executeConcat( - RopeConstants.paddingZeros(padding), - microSecondRope, - UTF8Encoding.INSTANCE); + appendTString = concatNode.execute(TStringConstants.paddingZeros(padding), microSecondTString, + utf8, true); } } break; @@ -686,18 +681,10 @@ public static Rope formatToRopeFast(Token[] compiledPattern, ZonedDateTime dt, throw CompilerDirectives.shouldNotReachHere(); } - if (rope == null) { - rope = appendRope; - } else { - rope = concatNode.executeConcat(rope, appendRope, UTF8Encoding.INSTANCE); - } - } - - if (rope == null) { - rope = RopeConstants.EMPTY_UTF8_ROPE; + tstring = concatNode.execute(tstring, appendTString, utf8, true); } - return rope; + return tstring; } private static int formatWeekOfYear(ZonedDateTime dt, int firstDayOfWeek) { @@ -778,7 +765,7 @@ private static String formatZone(int colons, int value, RubyTimeOutputFormatter private static String getRubyTimeZoneName(ZonedDateTime dt, Object zone) { RubyStringLibrary strings = RubyStringLibrary.getUncached(); if (strings.isRubyString(zone)) { - return strings.getJavaString(zone); + return RubyGuards.getJavaString(zone); } else { return ""; } diff --git a/src/main/java/org/truffleruby/core/time/TimeNodes.java b/src/main/java/org/truffleruby/core/time/TimeNodes.java index c1286f93d2f9..d461bb5165f7 100644 --- a/src/main/java/org/truffleruby/core/time/TimeNodes.java +++ b/src/main/java/org/truffleruby/core/time/TimeNodes.java @@ -12,11 +12,11 @@ import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.Cached; -import com.oracle.truffle.api.dsl.ImportStatic; import com.oracle.truffle.api.dsl.Specialization; -import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.object.Shape; import com.oracle.truffle.api.profiles.ConditionProfile; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyLanguage; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; @@ -28,14 +28,9 @@ import org.truffleruby.core.exception.ErrnoErrorNode; import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.numeric.RubyBignum; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; -import org.truffleruby.core.rope.RopeNodes; +import org.truffleruby.core.string.TStringBuilder; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringCachingGuards; -import org.truffleruby.core.string.StringNodes; -import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.string.StringHelperNodes; import org.truffleruby.core.string.StringUtils; import org.truffleruby.core.time.RubyDateFormatter.Token; import org.truffleruby.language.Nil; @@ -53,12 +48,6 @@ @CoreModule(value = "Time", isClass = true) public abstract class TimeNodes { - public static RubyString getShortZoneName(StringNodes.MakeStringNode makeStringNode, ZonedDateTime dt, - TimeZoneAndName zoneAndName) { - final String shortZoneName = zoneAndName.getName(dt); - return makeStringNode.executeMake(shortZoneName, Encodings.UTF_8, CodeRange.CR_UNKNOWN); - } - @CoreMethod(names = { "__allocate__", "__layout_allocate__" }, constructor = true, visibility = Visibility.PRIVATE) public abstract static class AllocateNode extends CoreMethodArrayArgumentsNode { @@ -93,10 +82,11 @@ public abstract static class LocalTimeNode extends PrimitiveArrayArgumentsNode { @Specialization protected RubyTime localtime(RubyTime time, Nil offset, - @Cached StringNodes.MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { final TimeZoneAndName timeZoneAndName = getTimeZoneNode.executeGetTimeZone(); final ZonedDateTime newDateTime = withZone(time.dateTime, timeZoneAndName.getZone()); - final RubyString zone = getShortZoneName(makeStringNode, newDateTime, timeZoneAndName); + final String shortZoneName = timeZoneAndName.getName(newDateTime); + final RubyString zone = createString(fromJavaStringNode, shortZoneName, Encodings.UTF_8); time.isUtc = false; time.relativeOffset = false; @@ -174,13 +164,14 @@ private ZonedDateTime inUTC(ZonedDateTime dateTime) { public abstract static class TimeNowNode extends CoreMethodArrayArgumentsNode { @Child private GetTimeZoneNode getTimeZoneNode = GetTimeZoneNodeGen.create(); - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @Specialization protected RubyTime timeNow(RubyClass timeClass) { final TimeZoneAndName zoneAndName = getTimeZoneNode.executeGetTimeZone(); final ZonedDateTime dt = now(zoneAndName.getZone()); - final RubyString zone = getShortZoneName(makeStringNode, dt, zoneAndName); + final String shortZoneName = zoneAndName.getName(dt); + final RubyString zone = createString(fromJavaStringNode, shortZoneName, Encodings.UTF_8); final RubyTime instance = new RubyTime(timeClass, getLanguage().timeShape, dt, zone, nil, false, false); AllocationTracing.trace(instance, this); return instance; @@ -198,13 +189,14 @@ private ZonedDateTime now(ZoneId timeZone) { public abstract static class TimeAtPrimitiveNode extends PrimitiveArrayArgumentsNode { @Child private GetTimeZoneNode getTimeZoneNode = GetTimeZoneNodeGen.create(); - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @Specialization protected RubyTime timeAt(RubyClass timeClass, long seconds, int nanoseconds) { final TimeZoneAndName zoneAndName = getTimeZoneNode.executeGetTimeZone(); final ZonedDateTime dateTime = getDateTime(seconds, nanoseconds, zoneAndName.getZone()); - final RubyString zone = getShortZoneName(makeStringNode, dateTime, zoneAndName); + final String shortZoneName = zoneAndName.getName(dateTime); + final RubyString zone = createString(fromJavaStringNode, shortZoneName, Encodings.UTF_8); final Shape shape = getLanguage().timeShape; final RubyTime instance = new RubyTime(timeClass, shape, dateTime, zone, nil, false, false); @@ -375,46 +367,52 @@ protected Object timeZone(RubyTime time) { } @Primitive(name = "time_strftime") - @ImportStatic({ StringCachingGuards.class, StringOperations.class }) public abstract static class TimeStrftimePrimitiveNode extends PrimitiveArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); @Child private ErrnoErrorNode errnoErrorNode = ErrnoErrorNode.create(); @Specialization( - guards = { "equalNode.execute(libFormat.getRope(format), cachedFormat)" }, + guards = "equalNode.execute(libFormat, format, cachedFormat, cachedEncoding)", limit = "getLanguage().options.TIME_FORMAT_CACHE") - protected RubyString timeStrftime(RubyTime time, Object format, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFormat, - @Cached("libFormat.getRope(format)") Rope cachedFormat, - @Cached(value = "compilePattern(cachedFormat)", dimensions = 1) Token[] pattern, - @Cached RopeNodes.EqualNode equalNode, + protected RubyString timeStrftimeCached(RubyTime time, Object format, + @Cached RubyStringLibrary libFormat, + @Cached("asTruffleStringUncached(format)") TruffleString cachedFormat, + @Cached("libFormat.getEncoding(format)") RubyEncoding cachedEncoding, + @Cached(value = "compilePattern(cachedFormat, cachedEncoding)", dimensions = 1) Token[] pattern, + @Cached StringHelperNodes.EqualSameEncodingNode equalNode, @Cached("formatCanBeFast(pattern)") boolean canUseFast, @Cached ConditionProfile yearIsFastProfile, - @Cached RopeNodes.ConcatNode concatNode) { + @Cached TruffleString.ConcatNode concatNode, + @Cached TruffleString.FromLongNode fromLongNode, + @Cached TruffleString.CodePointLengthNode codePointLengthNode, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { if (canUseFast && yearIsFastProfile.profile(yearIsFast(time))) { - final Rope rope = RubyDateFormatter.formatToRopeFast(pattern, time.dateTime, concatNode); - return makeStringNode.fromRope(rope, Encodings.UTF_8); + var tstring = RubyDateFormatter.formatToRopeFast(pattern, time.dateTime, concatNode, fromLongNode, + codePointLengthNode); + return createString(tstring, Encodings.UTF_8); } else { - final RubyEncoding rubyEncoding = libFormat.getEncoding(format); - final RopeBuilder ropeBuilder = formatTime(time, pattern); - return makeStringNode.fromBuilderUnsafe(ropeBuilder, rubyEncoding, CodeRange.CR_UNKNOWN); + final TStringBuilder tstringBuilder = formatTime(time, pattern); + return createString(tstringBuilder.toTStringUnsafe(fromByteArrayNode), cachedEncoding); } } @TruffleBoundary - @Specialization(guards = "libFormat.isRubyString(format)") + @Specialization(guards = "libFormat.isRubyString(format)", replaces = "timeStrftimeCached", limit = "1") protected RubyString timeStrftime(RubyTime time, Object format, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libFormat, - @Cached RopeNodes.ConcatNode concatNode) { - final Token[] pattern = compilePattern(libFormat.getRope(format)); + @Cached RubyStringLibrary libFormat, + @Cached TruffleString.ConcatNode concatNode, + @Cached TruffleString.FromLongNode fromLongNode, + @Cached TruffleString.CodePointLengthNode codePointLengthNode, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { + final RubyEncoding rubyEncoding = libFormat.getEncoding(format); + final Token[] pattern = compilePattern(libFormat.getTString(format), rubyEncoding); if (formatCanBeFast(pattern) && yearIsFast(time)) { - final Rope rope = RubyDateFormatter.formatToRopeFast(pattern, time.dateTime, concatNode); - return makeStringNode.fromRope(rope, Encodings.UTF_8); + var tstring = RubyDateFormatter.formatToRopeFast(pattern, time.dateTime, concatNode, fromLongNode, + codePointLengthNode); + return createString(tstring, Encodings.UTF_8); } else { - final RubyEncoding rubyEncoding = libFormat.getEncoding(format); - final RopeBuilder ropeBuilder = formatTime(time, pattern); - return makeStringNode.fromBuilderUnsafe(ropeBuilder, rubyEncoding, CodeRange.CR_UNKNOWN); + final TStringBuilder tstringBuilder = formatTime(time, pattern); + return createString(tstringBuilder.toTStringUnsafe(fromByteArrayNode), rubyEncoding); } } @@ -428,13 +426,13 @@ protected boolean yearIsFast(RubyTime time) { return year >= 1000 && year <= 9999; } - protected Token[] compilePattern(Rope format) { - return RubyDateFormatter.compilePattern(format, false, getContext(), this); + protected Token[] compilePattern(AbstractTruffleString format, RubyEncoding encoding) { + return RubyDateFormatter.compilePattern(format, encoding, false, getContext(), this); } // Optimised for the default Logger::Formatter time format: "%Y-%m-%dT%H:%M:%S.%6N " - private RopeBuilder formatTime(RubyTime time, Token[] pattern) { + private TStringBuilder formatTime(RubyTime time, Token[] pattern) { return RubyDateFormatter.formatToRopeBuilder( pattern, time.dateTime, @@ -461,7 +459,7 @@ private RopeBuilder formatTime(RubyTime time, Token[] pattern) { public abstract static class TimeSFromArrayPrimitiveNode extends PrimitiveArrayArgumentsNode { @Child private GetTimeZoneNode getTimeZoneNode = GetTimeZoneNodeGen.create(); - @Child private StringNodes.MakeStringNode makeStringNode; + @Child private TruffleString.FromJavaStringNode fromJavaStringNode; @Specialization(guards = "(isutc || !isRubyDynamicObject(utcoffset)) || isNil(utcoffset)") protected RubyTime timeSFromArray( @@ -518,9 +516,9 @@ private RubyTime buildTime(RubyLanguage language, RubyClass timeClass, int sec, relativeOffset = false; zoneToStore = language.coreStrings.UTC.createInstance(getContext()); } else if (utcoffset == nil) { - if (makeStringNode == null) { + if (fromJavaStringNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); - makeStringNode = insert(StringNodes.MakeStringNode.create()); + fromJavaStringNode = insert(TruffleString.FromJavaStringNode.create()); } envZone = getTimeZoneNode.executeGetTimeZone(); @@ -571,7 +569,8 @@ private RubyTime buildTime(RubyLanguage language, RubyClass timeClass, int sec, } if (envZone != null) { - zoneToStore = getShortZoneName(makeStringNode, dt, envZone); + final String shortZoneName = envZone.getName(dt); + zoneToStore = createString(fromJavaStringNode, shortZoneName, Encodings.UTF_8); } final Shape shape = getLanguage().timeShape; diff --git a/src/main/java/org/truffleruby/core/tracepoint/TraceBaseEventNode.java b/src/main/java/org/truffleruby/core/tracepoint/TraceBaseEventNode.java index 58dc789c2e15..7ce78e226517 100644 --- a/src/main/java/org/truffleruby/core/tracepoint/TraceBaseEventNode.java +++ b/src/main/java/org/truffleruby/core/tracepoint/TraceBaseEventNode.java @@ -11,9 +11,9 @@ import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; +import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.proc.RubyProc; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringOperations; import org.truffleruby.language.yield.CallBlockNode; import com.oracle.truffle.api.CompilerDirectives; @@ -43,8 +43,8 @@ protected RubyString getFile() { if (file == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); final Source source = eventContext.getInstrumentedSourceSection().getSource(); - file = StringOperations - .createUTF8String(context, language, language.getPathToRopeCache().getCachedPath(source)); + file = new RubyString(context.getCoreLibrary().stringClass, language.stringShape, false, + language.getPathToTStringCache().getCachedPath(source), Encodings.UTF_8); } return file; } diff --git a/src/main/java/org/truffleruby/debug/ChaosNode.java b/src/main/java/org/truffleruby/debug/ChaosNode.java index f059bdcb1ad0..a7045d127c24 100644 --- a/src/main/java/org/truffleruby/debug/ChaosNode.java +++ b/src/main/java/org/truffleruby/debug/ChaosNode.java @@ -13,7 +13,6 @@ import com.oracle.truffle.api.dsl.Fallback; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; -import org.truffleruby.core.CoreLibrary; import org.truffleruby.language.RubyBaseNodeWithExecute; import org.truffleruby.language.RubyContextSourceNode; @@ -39,7 +38,7 @@ protected Object chaos(int value) { } } - @Specialization(guards = "fitsIntoInteger(value)") + @Specialization(guards = "fitsInInteger(value)") protected Object chaos(long value) { if (randomBoolean()) { return value; @@ -48,15 +47,11 @@ protected Object chaos(long value) { } } - @Specialization(guards = "!fitsIntoInteger(value)") + @Specialization(guards = "!fitsInInteger(value)") protected long passThrough(long value) { return value; } - protected static boolean fitsIntoInteger(long value) { - return CoreLibrary.fitsIntoInteger(value); - } - @Fallback protected Object chaos(Object value) { return value; diff --git a/src/main/java/org/truffleruby/debug/GlobalVariablesObject.java b/src/main/java/org/truffleruby/debug/GlobalVariablesObject.java index aa7a585eea78..59753f138303 100644 --- a/src/main/java/org/truffleruby/debug/GlobalVariablesObject.java +++ b/src/main/java/org/truffleruby/debug/GlobalVariablesObject.java @@ -18,7 +18,6 @@ import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.library.ExportLibrary; import com.oracle.truffle.api.library.ExportMessage; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.core.string.RubyString; @@ -60,8 +59,8 @@ protected Object readMember(String member, } else { final RubyLanguage language = RubyLanguage.get(node); final RubyContext context = RubyContext.get(node); - final RubyString string = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(member, UTF8Encoding.INSTANCE)); + final RubyString string = StringOperations.createUTF8String(context, language, member); + return evalNode.call(context.getCoreLibrary().topLevelBinding, "eval", string); } } @@ -78,9 +77,9 @@ protected void writeMember(String member, Object value, final RubyLanguage language = RubyLanguage.get(node); final RubyContext context = RubyContext.get(node); final String code = "-> value { " + member + " = value }"; - final RubyString string = StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(code, UTF8Encoding.INSTANCE)); + final RubyString string = StringOperations.createUTF8String(context, language, code); final Object lambda = evalNode.call(context.getCoreLibrary().topLevelBinding, "eval", string); + callNode.call(lambda, "call", value); } } diff --git a/src/main/java/org/truffleruby/debug/TruffleDebugNodes.java b/src/main/java/org/truffleruby/debug/TruffleDebugNodes.java index a464d6dc53df..4741c8c8c282 100644 --- a/src/main/java/org/truffleruby/debug/TruffleDebugNodes.java +++ b/src/main/java/org/truffleruby/debug/TruffleDebugNodes.java @@ -32,6 +32,7 @@ import com.oracle.truffle.api.object.DynamicObjectLibrary; import com.oracle.truffle.api.source.Source; import com.oracle.truffle.api.source.SourceSection; +import com.oracle.truffle.api.strings.TruffleString; import org.graalvm.collections.Pair; import org.truffleruby.Layouts; import org.truffleruby.RubyLanguage; @@ -51,13 +52,13 @@ import org.truffleruby.core.binding.RubyBinding; import org.truffleruby.core.cast.ToCallTargetNode; import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.hash.RubyHash; import org.truffleruby.core.method.RubyMethod; import org.truffleruby.core.method.RubyUnboundMethod; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes.MakeStringNode; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.core.thread.ThreadManager; import org.truffleruby.extra.ffi.Pointer; @@ -67,6 +68,7 @@ import org.truffleruby.language.ImmutableRubyObject; import org.truffleruby.core.string.ImmutableRubyString; import org.truffleruby.language.RubyDynamicObject; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.RubyRootNode; import org.truffleruby.language.arguments.RubyArguments; import org.truffleruby.language.backtrace.BacktraceFormatter; @@ -115,10 +117,10 @@ public abstract static class DebugPrintNode extends CoreMethodArrayArgumentsNode @TruffleBoundary @Specialization protected Object debugPrint(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { final String javaString; if (strings.isRubyString(string)) { - javaString = strings.getJavaString(string); + javaString = RubyGuards.getJavaString(string); } else { javaString = string.toString(); } @@ -129,14 +131,42 @@ protected Object debugPrint(Object string, } + @CoreMethod(names = "tstring_to_debug_string", onSingleton = true, required = 1) + public abstract static class TStringToDebugPrintNode extends CoreMethodArrayArgumentsNode { + @TruffleBoundary + @Specialization(guards = "strings.isRubyString(string)", limit = "1") + protected RubyString toStringDebug(Object string, + @Cached RubyStringLibrary strings, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { + return createString(fromJavaStringNode, strings.getTString(string).toStringDebug(), Encodings.US_ASCII); + } + } + + @CoreMethod(names = "flatten_string", onSingleton = true, required = 1) + public abstract static class FlattenStringNode extends CoreMethodArrayArgumentsNode { + // Also flattens the original String, but that one might still have an offset + @TruffleBoundary + @Specialization(guards = "libString.isRubyString(string)", limit = "1") + protected RubyString flattenString(Object string, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode, + @Cached RubyStringLibrary libString) { + final RubyEncoding rubyEncoding = libString.getEncoding(string); + var tstring = libString.getTString(string); + // Use GetInternalByteArrayNode as a way to flatten the TruffleString. + // Ensure the result has offset = 0 and length = byte[].length for image build time checks + byte[] byteArray = TStringUtils.getBytesOrCopy(tstring, rubyEncoding); + return createString(fromByteArrayNode, byteArray, rubyEncoding); + } + } + @CoreMethod(names = "break_handle", onSingleton = true, required = 2, needsBlock = true, lowerFixnum = 2) public abstract static class BreakNode extends CoreMethodArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = "strings.isRubyString(file)") + @Specialization(guards = "strings.isRubyString(file)", limit = "1") protected RubyHandle setBreak(Object file, int line, RubyProc block, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - final String fileString = strings.getJavaString(file); + @Cached RubyStringLibrary strings) { + final String fileString = RubyGuards.getJavaString(file); final SourceSectionFilter filter = SourceSectionFilter .newBuilder() @@ -190,13 +220,12 @@ protected Object remove(RubyHandle handle) { @CoreMethod(names = "java_class_of", onSingleton = true, required = 1) public abstract static class JavaClassOfNode extends CoreMethodArrayArgumentsNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization protected RubyString javaClassOf(Object value) { - return makeStringNode - .executeMake(value.getClass().getSimpleName(), Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, value.getClass().getSimpleName(), Encodings.UTF_8); } } @@ -216,11 +245,11 @@ protected Object printBacktrace() { @CoreMethod(names = "parse_ast", onSingleton = true, required = 1) public abstract static class ParseASTNode extends CoreMethodArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = "strings.isRubyString(code)") + @Specialization(guards = "strings.isRubyString(code)", limit = "1") protected Object ast(Object code, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings, - @Cached MakeStringNode makeStringNode) { - String codeString = strings.getJavaString(code); + @Cached RubyStringLibrary strings, + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { + String codeString = RubyGuards.getJavaString(code); String name = ""; var source = Source.newBuilder("ruby", codeString, name).build(); var rubySource = new RubySource(source, name); @@ -231,7 +260,7 @@ protected Object ast(Object code, var rootParseNode = TranslatorDriver .parseToJRubyAST(getContext(), rubySource, staticScope, parserConfiguration, rubyWarnings); - return makeStringNode.executeMake(rootParseNode.toString(), Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, rootParseNode.toString(), Encodings.UTF_8); } } @@ -289,13 +318,12 @@ protected int astSize(Object executable, @CoreMethod(names = "shape", onSingleton = true, required = 1) public abstract static class ShapeNode extends CoreMethodArrayArgumentsNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization protected RubyString shape(RubyDynamicObject object) { - return makeStringNode - .executeMake(object.getShape().toString(), Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, object.getShape().toString(), Encodings.UTF_8); } } @@ -303,13 +331,13 @@ protected RubyString shape(RubyDynamicObject object) { @CoreMethod(names = "array_storage", onSingleton = true, required = 1) public abstract static class ArrayStorageNode extends CoreMethodArrayArgumentsNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization protected RubyString arrayStorage(RubyArray array) { String storage = ArrayStoreLibrary.getFactory().getUncached().toString(array.getStore()); - return makeStringNode.executeMake(storage, Encodings.US_ASCII, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, storage, Encodings.US_ASCII); } } @@ -330,14 +358,14 @@ protected long arrayStorage(RubyArray array, @CoreMethod(names = "hash_storage", onSingleton = true, required = 1) public abstract static class HashStorageNode extends CoreMethodArrayArgumentsNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization protected RubyString hashStorage(RubyHash hash) { Object store = hash.store; String storage = store == null ? "null" : store.getClass().toString(); - return makeStringNode.executeMake(storage, Encodings.US_ASCII, CodeRange.CR_7BIT); + return createString(fromJavaStringNode, storage, Encodings.US_ASCII); } } @@ -422,10 +450,10 @@ static void config(String message) { public abstract static class ThrowJavaExceptionNode extends CoreMethodArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = "strings.isRubyString(message)") + @Specialization(guards = "strings.isRubyString(message)", limit = "1") protected Object throwJavaException(Object message, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - callingMethod(strings.getJavaString(message)); + @Cached RubyStringLibrary strings) { + callingMethod(RubyGuards.getJavaString(message)); return nil; } @@ -445,11 +473,11 @@ private static void throwingMethod(String message) { public abstract static class ThrowJavaExceptionWithCauseNode extends CoreMethodArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = "strings.isRubyString(message)") + @Specialization(guards = "strings.isRubyString(message)", limit = "1") protected Object throwJavaExceptionWithCause(Object message, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { throw new RuntimeException( - strings.getJavaString(message), + RubyGuards.getJavaString(message), new RuntimeException("cause 1", new RuntimeException("cause 2"))); } @@ -459,10 +487,10 @@ protected Object throwJavaExceptionWithCause(Object message, public abstract static class ThrowAssertionErrorNode extends CoreMethodArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = "strings.isRubyString(message)") + @Specialization(guards = "strings.isRubyString(message)", limit = "1") protected Object throwAssertionError(Object message, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - throw new AssertionError(strings.getJavaString(message)); + @Cached RubyStringLibrary strings) { + throw new AssertionError(RubyGuards.getJavaString(message)); } } @@ -1051,10 +1079,10 @@ protected String toDisplayString(boolean allowSideEffects) { } @TruffleBoundary - @Specialization(guards = "strings.isRubyString(string)") + @Specialization(guards = "strings.isRubyString(string)", limit = "1") protected Object foreignString(Object string, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - return new ForeignString(strings.getJavaString(string)); + @Cached RubyStringLibrary strings) { + return new ForeignString(RubyGuards.getJavaString(string)); } } @@ -1085,10 +1113,10 @@ protected String toDisplayString(boolean allowSideEffects) { } @TruffleBoundary - @Specialization(guards = "strings.isRubyString(message)") + @Specialization(guards = "strings.isRubyString(message)", limit = "1") protected Object foreignException(Object message, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - return new ForeignException(strings.getJavaString(message)); + @Cached RubyStringLibrary strings) { + return new ForeignException(RubyGuards.getJavaString(message)); } } @@ -1161,13 +1189,12 @@ protected Object drainFinalizationQueue() { @Primitive(name = "frame_declaration_context_to_string") public abstract static class FrameDeclarationContextToStringNode extends PrimitiveArrayArgumentsNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @Specialization protected RubyString getDeclarationContextToString(VirtualFrame frame) { final DeclarationContext declarationContext = RubyArguments.getDeclarationContext(frame); - return makeStringNode - .executeMake(declarationContext.toString(), Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, declarationContext.toString(), Encodings.UTF_8); } } @@ -1233,7 +1260,7 @@ protected RubyArray getFrameBindings() { @CoreMethod(names = "parse_name_of_method", onSingleton = true, required = 1) public abstract static class ParseNameOfMethodNode extends CoreMethodArrayArgumentsNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @Specialization protected RubyString parseName(RubyMethod method) { @@ -1247,7 +1274,7 @@ protected RubyString parseName(RubyUnboundMethod method) { protected RubyString parseName(InternalMethod method) { String parseName = method.getSharedMethodInfo().getParseName(); - return makeStringNode.executeMake(parseName, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, parseName, Encodings.UTF_8); } } diff --git a/src/main/java/org/truffleruby/extra/TruffleGraalNodes.java b/src/main/java/org/truffleruby/extra/TruffleGraalNodes.java index 959f0413b26b..427804b37e65 100644 --- a/src/main/java/org/truffleruby/extra/TruffleGraalNodes.java +++ b/src/main/java/org/truffleruby/extra/TruffleGraalNodes.java @@ -10,7 +10,6 @@ package org.truffleruby.extra; import com.oracle.truffle.api.dsl.Cached; -import com.oracle.truffle.api.library.CachedLibrary; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreModule; @@ -20,6 +19,7 @@ import org.truffleruby.core.proc.ProcCallTargets; import org.truffleruby.core.proc.ProcType; import org.truffleruby.core.proc.RubyProc; +import org.truffleruby.interop.ToJavaStringNode; import org.truffleruby.language.RubyLambdaRootNode; import org.truffleruby.language.RubyNode; import org.truffleruby.language.RubyRootNode; @@ -181,10 +181,11 @@ private void compiledBoundary() { @NodeChild(value = "value", type = RubyNode.class) public abstract static class BailoutNode extends PrimitiveNode { - @Specialization(guards = "strings.isRubyString(message)") + @Specialization(guards = "strings.isRubyString(message)", limit = "1") protected Object bailout(Object message, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - CompilerDirectives.bailout(strings.getJavaString(message)); + @Cached RubyStringLibrary strings, + @Cached ToJavaStringNode toJavaStringNode) { + CompilerDirectives.bailout(toJavaStringNode.executeToJavaString(message)); return nil; } } diff --git a/src/main/java/org/truffleruby/extra/TrufflePosixNodes.java b/src/main/java/org/truffleruby/extra/TrufflePosixNodes.java index c349be81e267..5b1092cfb98c 100644 --- a/src/main/java/org/truffleruby/extra/TrufflePosixNodes.java +++ b/src/main/java/org/truffleruby/extra/TrufflePosixNodes.java @@ -9,7 +9,7 @@ */ package org.truffleruby.extra; -import com.oracle.truffle.api.library.CachedLibrary; +import com.oracle.truffle.api.dsl.Cached; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreModule; import org.truffleruby.builtins.Primitive; @@ -17,6 +17,7 @@ import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.Specialization; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.library.RubyStringLibrary; @CoreModule(value = "Truffle::POSIX", isClass = true) @@ -33,10 +34,10 @@ private static void invalidateENV(String name) { public abstract static class InvalidateEnvNode extends CoreMethodArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = "libEnvVar.isRubyString(envVar)") + @Specialization(guards = "libEnvVar.isRubyString(envVar)", limit = "1") protected Object invalidate(Object envVar, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libEnvVar) { - invalidateENV(libEnvVar.getJavaString(envVar)); + @Cached RubyStringLibrary libEnvVar) { + invalidateENV(RubyGuards.getJavaString(envVar)); return envVar; } diff --git a/src/main/java/org/truffleruby/extra/TruffleRubyNodes.java b/src/main/java/org/truffleruby/extra/TruffleRubyNodes.java index 44b256065fb1..5d81faf55150 100644 --- a/src/main/java/org/truffleruby/extra/TruffleRubyNodes.java +++ b/src/main/java/org/truffleruby/extra/TruffleRubyNodes.java @@ -13,6 +13,7 @@ import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.object.DynamicObjectLibrary; import com.oracle.truffle.api.profiles.BranchProfile; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.Layouts; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; @@ -24,8 +25,6 @@ import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.mutex.MutexOperations; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.string.StringNodes; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.Truffle; @@ -44,12 +43,12 @@ public abstract static class GraalvmHomeNode extends CoreMethodArrayArgumentsNod @Specialization protected Object graalvmHome( - @Cached StringNodes.MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { String value = getProperty("org.graalvm.home"); if (value == null) { return nil; } else { - return makeStringNode.executeMake(value, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, value, Encodings.UTF_8); } } diff --git a/src/main/java/org/truffleruby/extra/ffi/Pointer.java b/src/main/java/org/truffleruby/extra/ffi/Pointer.java index e0f63b7db956..4f0c082cb4a6 100644 --- a/src/main/java/org/truffleruby/extra/ffi/Pointer.java +++ b/src/main/java/org/truffleruby/extra/ffi/Pointer.java @@ -14,17 +14,19 @@ import com.oracle.truffle.api.interop.InteropException; import com.oracle.truffle.api.interop.InteropLibrary; +import com.oracle.truffle.api.interop.TruffleObject; +import com.oracle.truffle.api.library.ExportLibrary; +import com.oracle.truffle.api.library.ExportMessage; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; -import org.truffleruby.SuppressFBWarnings; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import sun.misc.Unsafe; -@SuppressFBWarnings("Nm") -public final class Pointer implements AutoCloseable { +@ExportLibrary(InteropLibrary.class) +public final class Pointer implements AutoCloseable, TruffleObject { public static final Pointer NULL = new Pointer(0); public static final long SIZE = Long.BYTES; @@ -102,6 +104,7 @@ private Pointer(long address, long size, RubyLanguage language) { enableAutoreleaseUnsynchronized(language); } + @ExportMessage.Ignore public boolean isNull() { return address == 0; } @@ -123,6 +126,16 @@ public boolean isBounded() { return size != UNBOUNDED; } + @ExportMessage + protected boolean isPointer() { + return true; + } + + @ExportMessage + protected long asPointer() { + return address; + } + public void writeByte(long offset, byte b) { assert address + offset != 0; UNSAFE.putByte(address + offset, b); @@ -163,29 +176,30 @@ public void writeZeroTerminatedBytes(long offset, byte[] bytes, int start, int l } @TruffleBoundary - public void writeBytes(long offset, long size, byte value) { - assert address + offset != 0 || size == 0; - UNSAFE.setMemory(address + offset, size, value); + public void writeBytes(long destByteOffset, long size, byte value) { + assert address + destByteOffset != 0 || size == 0; + UNSAFE.setMemory(address + destByteOffset, size, value); } @TruffleBoundary - public void writeBytes(long offset, Pointer buffer, int bufferPos, long length) { - assert address + offset != 0 || length == 0; - assert buffer != null; - assert bufferPos >= 0; - assert length >= 0; + public void writeBytes(long destByteOffset, Pointer source, int sourceByteOffset, long bytesToCopy) { + assert address + destByteOffset != 0 || bytesToCopy == 0; + assert source != null; + assert sourceByteOffset >= 0; + assert bytesToCopy >= 0; - UNSAFE.copyMemory(buffer.getAddress() + bufferPos, address + offset, length); + UNSAFE.copyMemory(source.getAddress() + sourceByteOffset, address + destByteOffset, bytesToCopy); } @TruffleBoundary - public void writeBytes(long offset, byte[] buffer, int bufferPos, int length) { - assert address + offset != 0 || length == 0; - assert buffer != null; - assert bufferPos >= 0; - assert length >= 0; + public void writeBytes(long destByteOffset, byte[] source, int sourceByteOffset, int bytesToCopy) { + assert address + destByteOffset != 0 || bytesToCopy == 0; + assert source != null; + assert sourceByteOffset >= 0; + assert bytesToCopy >= 0; - UNSAFE.copyMemory(buffer, Unsafe.ARRAY_BYTE_BASE_OFFSET + bufferPos, null, address + offset, length); + UNSAFE.copyMemory(source, Unsafe.ARRAY_BYTE_BASE_OFFSET + sourceByteOffset, null, address + destByteOffset, + bytesToCopy); } public byte readByte(long offset) { @@ -209,24 +223,6 @@ public void readBytes(long offset, byte[] buffer, int bufferPos, int length) { UNSAFE.copyMemory(null, address + offset, buffer, Unsafe.ARRAY_BYTE_BASE_OFFSET + bufferPos, length); } - @TruffleBoundary - public boolean readBytesCheck8Bit(byte[] buffer, int length) { - assert address != 0 || length == 0; - assert buffer != null; - assert length >= 0; - - long base = address; - boolean highBitUsed = false; - for (int i = 0; i < length; i++) { - byte aByte = UNSAFE.getByte(null, base + i); - if (aByte < 0) { - highBitUsed = true; - } - buffer[i] = aByte; - } - return highBitUsed; - } - public short readShort(long offset) { assert address + offset != 0; return UNSAFE.getShort(address + offset); diff --git a/src/main/java/org/truffleruby/extra/ffi/PointerNodes.java b/src/main/java/org/truffleruby/extra/ffi/PointerNodes.java index 7cd04635a2c9..233ec191d1f6 100644 --- a/src/main/java/org/truffleruby/extra/ffi/PointerNodes.java +++ b/src/main/java/org/truffleruby/extra/ffi/PointerNodes.java @@ -15,7 +15,7 @@ import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.object.Shape; -import org.jcodings.specific.ASCIIEncoding; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; @@ -26,16 +26,13 @@ import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.numeric.BigIntegerOps; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeNodes; import org.truffleruby.core.string.RubyString; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.core.support.RubyByteArray; import org.truffleruby.core.numeric.RubyBignum; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.language.Nil; -import org.truffleruby.language.NotProvided; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.Visibility; import org.truffleruby.language.control.RaiseException; import org.truffleruby.language.library.RubyStringLibrary; @@ -94,8 +91,7 @@ public abstract static class PointerFindTypeSizePrimitiveNode extends PrimitiveA @TruffleBoundary @Specialization - protected int findTypeSize(RubySymbol type, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringLibrary) { + protected int findTypeSize(RubySymbol type) { final String typeString = type.getString(); final int size = typeSize(typeString); if (size > 0) { @@ -104,7 +100,7 @@ protected int findTypeSize(RubySymbol type, final Object typedef = getContext() .getTruffleNFI() .resolveTypeRaw(getContext().getNativeConfiguration(), typeString); - final int typedefSize = typeSize(stringLibrary.getJavaString(typedef)); + final int typedefSize = typeSize(RubyGuards.getJavaString(typedef)); assert typedefSize > 0 : typedef; return typedefSize; } @@ -263,56 +259,27 @@ public abstract static class PointerReadStringToNullNode extends PointerPrimitiv @Specialization(guards = "limit == 0") protected RubyString readNullPointer(long address, long limit) { - final RubyString instance = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - RopeConstants.EMPTY_ASCII_8BIT_ROPE, - Encodings.BINARY); - AllocationTracing.trace(instance, this); - return instance; + return createString(TStringConstants.EMPTY_BINARY, Encodings.BINARY); } @Specialization(guards = "limit != 0") protected RubyString readStringToNull(long address, long limit, - @Cached RopeNodes.MakeLeafRopeNode makeLeafRopeNode, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode, @CachedLibrary(limit = "1") InteropLibrary interop) { final Pointer ptr = new Pointer(address); checkNull(ptr); - final byte[] bytes = ptr - .readZeroTerminatedByteArray(getContext(), interop, 0, limit); - final Rope rope = makeLeafRopeNode - .executeMake(bytes, ASCIIEncoding.INSTANCE, CodeRange.CR_UNKNOWN, NotProvided.INSTANCE); - - final RubyString instance = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - rope, - Encodings.BINARY); - AllocationTracing.trace(instance, this); - return instance; + final byte[] bytes = ptr.readZeroTerminatedByteArray(getContext(), interop, 0, limit); + return createString(fromByteArrayNode, bytes, Encodings.BINARY); } @Specialization protected RubyString readStringToNull(long address, Nil limit, - @Cached RopeNodes.MakeLeafRopeNode makeLeafRopeNode, - @CachedLibrary(limit = "1") InteropLibrary interop) { + @CachedLibrary(limit = "1") InteropLibrary interop, + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { final Pointer ptr = new Pointer(address); checkNull(ptr); - final byte[] bytes = ptr - .readZeroTerminatedByteArray(getContext(), interop, 0); - final Rope rope = makeLeafRopeNode - .executeMake(bytes, ASCIIEncoding.INSTANCE, CodeRange.CR_UNKNOWN, NotProvided.INSTANCE); - - final RubyString instance = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - rope, - Encodings.BINARY); - AllocationTracing.trace(instance, this); - return instance; + final byte[] bytes = ptr.readZeroTerminatedByteArray(getContext(), interop, 0); + return createString(fromByteArrayNode, bytes, Encodings.BINARY); } } @@ -322,8 +289,7 @@ public abstract static class PointerReadBytesToArrayNode extends PointerPrimitiv @Specialization protected Object readBytes(RubyByteArray array, int arrayOffset, long address, int length, - @Cached ConditionProfile zeroProfile, - @Cached RopeNodes.MakeLeafRopeNode makeLeafRopeNode) { + @Cached ConditionProfile zeroProfile) { final Pointer ptr = new Pointer(address); if (zeroProfile.profile(length == 0)) { // No need to check the pointer address if we read nothing @@ -344,33 +310,16 @@ public abstract static class PointerReadBytesNode extends PointerPrimitiveArrayA @Specialization protected RubyString readBytes(long address, int length, @Cached ConditionProfile zeroProfile, - @Cached RopeNodes.MakeLeafRopeNode makeLeafRopeNode) { + @Cached TruffleString.FromByteArrayNode fromByteArrayNode) { final Pointer ptr = new Pointer(address); if (zeroProfile.profile(length == 0)) { // No need to check the pointer address if we read nothing - final RubyString instance = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - RopeConstants.EMPTY_ASCII_8BIT_ROPE, - Encodings.BINARY); - AllocationTracing.trace(instance, this); - return instance; + return createString(TStringConstants.EMPTY_BINARY, Encodings.BINARY); } else { checkNull(ptr); final byte[] bytes = new byte[length]; - final boolean is8Bit = ptr.readBytesCheck8Bit(bytes, length); - final Rope rope = makeLeafRopeNode - .executeMake(bytes, ASCIIEncoding.INSTANCE, is8Bit ? CodeRange.CR_VALID : CodeRange.CR_7BIT, - length); - final RubyString instance = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - rope, - Encodings.BINARY); - AllocationTracing.trace(instance, this); - return instance; + ptr.readBytes(0, bytes, 0, length); + return createString(fromByteArrayNode, bytes, Encodings.BINARY); } } @@ -379,19 +328,24 @@ protected RubyString readBytes(long address, int length, @Primitive(name = "pointer_write_bytes", lowerFixnum = { 2, 3 }) public abstract static class PointerWriteBytesNode extends PointerPrimitiveArrayArgumentsNode { - @Specialization(guards = "libString.isRubyString(string)") + @Specialization(guards = "libString.isRubyString(string)", limit = "1") protected Object writeBytes(long address, Object string, int index, int length, - @Cached RopeNodes.BytesNode bytesNode, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libString) { - final Pointer ptr = new Pointer(address); - final Rope rope = libString.getRope(string); - assert index + length <= rope.byteLength(); - if (length != 0) { + @Cached ConditionProfile nonZeroProfile, + @Cached TruffleString.CopyToNativeMemoryNode copyToNativeMemoryNode, + @Cached RubyStringLibrary libString) { + Pointer ptr = new Pointer(address); + var tstring = libString.getTString(string); + var encoding = libString.getTEncoding(string); + + assert index + length <= tstring.byteLength(encoding); + + if (nonZeroProfile.profile(length != 0)) { // No need to check the pointer address if we write nothing checkNull(ptr); + + copyToNativeMemoryNode.execute(tstring, index, ptr, 0, length, encoding); } - ptr.writeBytes(0, bytesNode.execute(rope), index, length); return string; } diff --git a/src/main/java/org/truffleruby/interop/FromJavaStringNode.java b/src/main/java/org/truffleruby/interop/FromJavaStringNode.java index 6c9178a9e758..64590c85d03c 100644 --- a/src/main/java/org/truffleruby/interop/FromJavaStringNode.java +++ b/src/main/java/org/truffleruby/interop/FromJavaStringNode.java @@ -9,13 +9,9 @@ */ package org.truffleruby.interop; -import org.jcodings.specific.UTF8Encoding; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; -import org.truffleruby.core.string.StringOperations; import org.truffleruby.language.RubyBaseNode; import com.oracle.truffle.api.dsl.Cached; @@ -34,17 +30,17 @@ public static FromJavaStringNode create() { @Specialization(guards = "stringsEquals(cachedValue, value)", limit = "getLimit()") protected RubyString doCached(String value, @Cached("value") String cachedValue, - @Cached("getRope(value)") Rope cachedRope, - @Cached StringNodes.MakeStringNode makeStringNode) { - var rubyString = makeStringNode.fromRope(cachedRope, Encodings.UTF_8); + @Cached TruffleString.FromJavaStringNode tstringFromJavaStringNode, + @Cached("getTString(cachedValue, tstringFromJavaStringNode)") TruffleString cachedRope) { + var rubyString = createString(cachedRope, Encodings.UTF_8); rubyString.freeze(); return rubyString; } @Specialization(replaces = "doCached") protected RubyString doGeneric(String value, - @Cached StringNodes.MakeStringNode makeStringNode) { - var rubyString = makeStringNode.executeMake(value, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { + var rubyString = createString(fromJavaStringNode, value, Encodings.UTF_8); rubyString.freeze(); return rubyString; } @@ -53,8 +49,8 @@ protected boolean stringsEquals(String a, String b) { return a.equals(b); } - protected Rope getRope(String value) { - return StringOperations.encodeRope(value, UTF8Encoding.INSTANCE); + protected TruffleString getTString(String value, TruffleString.FromJavaStringNode tstringFromJavaStringNode) { + return tstringFromJavaStringNode.execute(value, TruffleString.Encoding.UTF_8); } protected int getLimit() { diff --git a/src/main/java/org/truffleruby/interop/InteropNodes.java b/src/main/java/org/truffleruby/interop/InteropNodes.java index bdef49df3d6b..a46109960332 100644 --- a/src/main/java/org/truffleruby/interop/InteropNodes.java +++ b/src/main/java/org/truffleruby/interop/InteropNodes.java @@ -20,10 +20,8 @@ import com.oracle.truffle.api.frame.VirtualFrame; import com.oracle.truffle.api.interop.ExceptionType; import com.oracle.truffle.api.interop.NodeLibrary; -import com.oracle.truffle.api.profiles.ConditionProfile; import com.oracle.truffle.api.source.SourceSection; import com.oracle.truffle.api.strings.TruffleString; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreMethodNode; @@ -36,13 +34,9 @@ import org.truffleruby.core.array.RubyArray; import org.truffleruby.core.array.library.ArrayStoreLibrary; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringCachingGuards; -import org.truffleruby.core.string.StringNodes; +import org.truffleruby.core.string.StringHelperNodes; import org.truffleruby.core.string.StringOperations; import org.truffleruby.core.string.StringUtils; import org.truffleruby.core.symbol.RubySymbol; @@ -119,14 +113,11 @@ public abstract static class AllMethodsOfInteropLibrary extends PrimitiveArrayAr @TruffleBoundary @Specialization - protected RubyArray allMethodsOfInteropLibrary() { + protected RubyArray allMethodsOfInteropLibrary( + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { Object[] store = new Object[METHODS.length]; for (int i = 0; i < METHODS.length; i++) { - store[i] = StringOperations - .createString( - this, - StringOperations.encodeRope(METHODS[i], UTF8Encoding.INSTANCE), - Encodings.UTF_8); + store[i] = createString(fromJavaStringNode, METHODS[i], Encodings.UTF_8); } return createArray(store); } @@ -196,10 +187,10 @@ protected Object proxyForeignObject(Object delegate, Object logger) { public abstract static class MimeTypeSupportedNode extends CoreMethodArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = "strings.isRubyString(mimeType)") + @Specialization(guards = "strings.isRubyString(mimeType)", limit = "1") protected boolean isMimeTypeSupported(RubyString mimeType, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - return getContext().getEnv().isMimeTypeSupported(strings.getJavaString(mimeType)); + @Cached RubyStringLibrary strings) { + return getContext().getEnv().isMimeTypeSupported(RubyGuards.getJavaString(mimeType)); } } @@ -208,14 +199,14 @@ protected boolean isMimeTypeSupported(RubyString mimeType, public abstract static class ImportFileNode extends CoreMethodArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = "strings.isRubyString(fileName)") + @Specialization(guards = "strings.isRubyString(fileName)", limit = "1") protected Object importFile(Object fileName, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { try { //intern() to improve footprint final TruffleFile file = getContext() .getEnv() - .getPublicTruffleFile(strings.getJavaString(fileName).intern()); + .getPublicTruffleFile(RubyGuards.getJavaString(fileName).intern()); final Source source = Source.newBuilder(TruffleRuby.LANGUAGE_ID, file).build(); getContext().getEnv().parsePublic(source).call(); } catch (IOException e) { @@ -228,7 +219,6 @@ protected Object importFile(Object fileName, } @CoreMethod(names = "eval", onSingleton = true, required = 2) - @ImportStatic({ StringCachingGuards.class, StringOperations.class }) @ReportPolymorphism public abstract static class EvalNode extends CoreMethodArrayArgumentsNode { @@ -236,35 +226,37 @@ public abstract static class EvalNode extends CoreMethodArrayArgumentsNode { guards = { "stringsMimeType.isRubyString(mimeType)", "stringsSource.isRubyString(source)", - "mimeTypeEqualNode.execute(stringsMimeType.getRope(mimeType), cachedMimeType)", - "sourceEqualNode.execute(stringsSource.getRope(source), cachedSource)" }, + "mimeTypeEqualNode.execute(stringsMimeType, mimeType, cachedMimeType, cachedMimeTypeEnc)", + "sourceEqualNode.execute(stringsSource, source, cachedSource, cachedSourceEnc)" }, limit = "getEvalCacheLimit()") protected Object evalCached(Object mimeType, Object source, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsMimeType, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsSource, - @Cached("stringsMimeType.getRope(mimeType)") Rope cachedMimeType, - @Cached("stringsSource.getRope(source)") Rope cachedSource, - @Cached("create(parse(stringsMimeType.getRope(mimeType), stringsSource.getRope(source)))") DirectCallNode callNode, - @Cached RopeNodes.EqualNode mimeTypeEqualNode, - @Cached RopeNodes.EqualNode sourceEqualNode) { + @Cached RubyStringLibrary stringsMimeType, + @Cached RubyStringLibrary stringsSource, + @Cached("asTruffleStringUncached(mimeType)") TruffleString cachedMimeType, + @Cached("stringsMimeType.getEncoding(mimeType)") RubyEncoding cachedMimeTypeEnc, + @Cached("asTruffleStringUncached(source)") TruffleString cachedSource, + @Cached("stringsSource.getEncoding(source)") RubyEncoding cachedSourceEnc, + @Cached("create(parse(getJavaString(mimeType), getJavaString(source)))") DirectCallNode callNode, + @Cached StringHelperNodes.EqualNode mimeTypeEqualNode, + @Cached StringHelperNodes.EqualNode sourceEqualNode) { return callNode.call(EMPTY_ARGUMENTS); } @Specialization( guards = { "stringsMimeType.isRubyString(mimeType)", "stringsSource.isRubyString(source)" }, - replaces = "evalCached") + replaces = "evalCached", limit = "1") protected Object evalUncached(Object mimeType, RubyString source, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsMimeType, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsSource, + @Cached RubyStringLibrary stringsMimeType, + @Cached RubyStringLibrary stringsSource, + @Cached ToJavaStringNode toJavaStringMimeNode, + @Cached ToJavaStringNode toJavaStringSourceNode, @Cached IndirectCallNode callNode) { - return callNode - .call(parse(stringsMimeType.getRope(mimeType), stringsSource.getRope(source)), EMPTY_ARGUMENTS); + return callNode.call(parse(toJavaStringMimeNode.executeToJavaString(mimeType), + toJavaStringSourceNode.executeToJavaString(source)), EMPTY_ARGUMENTS); } @TruffleBoundary - protected CallTarget parse(Rope ropeMimeType, Rope ropeCode) { - final String mimeTypeString = RopeOperations.decodeRope(ropeMimeType); - final String codeString = RopeOperations.decodeRope(ropeCode); + protected CallTarget parse(String mimeTypeString, String codeString) { String language = Source.findLanguage(mimeTypeString); if (language == null) { // Give the original string to get the nice exception from Truffle @@ -287,17 +279,16 @@ protected int getEvalCacheLimit() { @Primitive(name = "interop_eval_nfi") public abstract static class InteropEvalNFINode extends PrimitiveArrayArgumentsNode { - @Specialization(guards = "library.isRubyString(code)") + @Specialization(guards = "library.isRubyString(code)", limit = "1") protected Object evalNFI(Object code, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary library, + @Cached RubyStringLibrary library, @Cached IndirectCallNode callNode) { - return callNode.call(parse(library.getRope(code)), EMPTY_ARGUMENTS); + return callNode.call(parse(code), EMPTY_ARGUMENTS); } @TruffleBoundary - protected CallTarget parse(Rope code) { - final String codeString = RopeOperations.decodeRope(code); - final Source source = Source.newBuilder("nfi", codeString, "(eval)").build(); + protected CallTarget parse(Object code) { + final Source source = Source.newBuilder("nfi", RubyGuards.getJavaString(code), "(eval)").build(); try { return getContext().getEnv().parseInternal(source); @@ -808,10 +799,7 @@ public abstract static class ForeignStringToRubyStringNode extends PrimitiveArra protected RubyString foreignStringToRubyString(Object receiver, @CachedLibrary("receiver") InteropLibrary receivers, @Cached TranslateInteropExceptionNode translateInteropException, - @Cached TruffleString.SwitchEncodingNode switchEncodingNode, - @Cached TruffleString.GetInternalByteArrayNode getInternalByteArrayNode, - @Cached ConditionProfile offsetZeroProfile, - @Cached StringNodes.MakeStringNode makeStringNode) { + @Cached TruffleString.SwitchEncodingNode switchEncodingNode) { final TruffleString truffleString; try { truffleString = receivers.asTruffleString(receiver); @@ -820,15 +808,7 @@ protected RubyString foreignStringToRubyString(Object receiver, } var asUTF8 = switchEncodingNode.execute(truffleString, TruffleString.Encoding.UTF_8); - var bytes = getInternalByteArrayNode.execute(asUTF8, TruffleString.Encoding.UTF_8); - final byte[] utf8Bytes; - if (offsetZeroProfile.profile(bytes.getOffset() == 0 && bytes.getLength() == bytes.getArray().length)) { - utf8Bytes = bytes.getArray(); - } else { - utf8Bytes = ArrayUtils.extractRange(bytes.getArray(), bytes.getOffset(), bytes.getEnd()); - } - - var rubyString = makeStringNode.executeMake(utf8Bytes, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + var rubyString = createString(asUTF8, Encodings.UTF_8); rubyString.freeze(); return rubyString; } @@ -846,12 +826,12 @@ protected Object toDisplayString(Object receiver, @CoreMethod(names = "to_string", onSingleton = true, required = 1) public abstract static class ToStringNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization protected RubyString toString(Object value) { - return makeStringNode.executeMake(String.valueOf(value), Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, String.valueOf(value), Encodings.UTF_8); } } @@ -1594,10 +1574,7 @@ protected RubyArray languages() { final String[] languagesArray = languages.keySet().toArray(StringUtils.EMPTY_STRING_ARRAY); final Object[] rubyStringArray = new Object[languagesArray.length]; for (int i = 0; i < languagesArray.length; i++) { - rubyStringArray[i] = StringOperations.createUTF8String( - getContext(), - getLanguage(), - StringOperations.encodeRope(languagesArray[i], UTF8Encoding.INSTANCE)); + rubyStringArray[i] = StringOperations.createUTF8String(getContext(), getLanguage(), languagesArray[i]); } return createArray(rubyStringArray); } @@ -1745,18 +1722,13 @@ public abstract static class JavaTypeNode extends CoreMethodArrayArgumentsNode { // TODO CS 17-Mar-18 we should cache this in the future @Specialization - protected Object javaTypeSymbol(RubySymbol name) { - return javaType(name.getString()); - } - - @Specialization(guards = "strings.isRubyString(name)") - protected Object javaTypeString(Object name, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - return javaType(strings.getJavaString(name)); + protected Object javaType(Object name, + @Cached ToJavaStringNode toJavaStringNode) { + return lookupJavaType(toJavaStringNode.executeToJavaString(name)); } @TruffleBoundary - private Object javaType(String name) { + private Object lookupJavaType(String name) { final TruffleLanguage.Env env = getContext().getEnv(); if (!env.isHostLookupAllowed()) { @@ -1774,13 +1746,13 @@ private Object javaType(String name) { public abstract static class JavaAddToClasspathNode extends PrimitiveArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = "strings.isRubyString(path)") + @Specialization(guards = "strings.isRubyString(path)", limit = "1") protected boolean javaAddToClasspath(Object path, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { TruffleLanguage.Env env = getContext().getEnv(); try { TruffleFile file = FileLoader.getSafeTruffleFile(getLanguage(), getContext(), - strings.getJavaString(path)); + RubyGuards.getJavaString(path)); env.addToHostClassPath(file); return true; } catch (SecurityException e) { diff --git a/src/main/java/org/truffleruby/interop/PolyglotNodes.java b/src/main/java/org/truffleruby/interop/PolyglotNodes.java index db2fcf5fb198..0322abe00d83 100644 --- a/src/main/java/org/truffleruby/interop/PolyglotNodes.java +++ b/src/main/java/org/truffleruby/interop/PolyglotNodes.java @@ -12,29 +12,25 @@ import java.io.IOException; import com.oracle.truffle.api.TruffleContext; -import com.oracle.truffle.api.dsl.Bind; -import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.profiles.BranchProfile; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreModule; import org.truffleruby.builtins.Primitive; import org.truffleruby.builtins.PrimitiveArrayArgumentsNode; +import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.klass.RubyClass; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodes; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.core.string.StringCachingGuards; -import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.string.StringHelperNodes; import org.truffleruby.language.NotProvided; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.control.RaiseException; import com.oracle.truffle.api.CallTarget; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.TruffleFile; import com.oracle.truffle.api.dsl.Cached; -import com.oracle.truffle.api.dsl.ImportStatic; import com.oracle.truffle.api.dsl.ReportPolymorphism; import com.oracle.truffle.api.dsl.Specialization; import com.oracle.truffle.api.nodes.DirectCallNode; @@ -48,7 +44,6 @@ public abstract class PolyglotNodes { @CoreMethod(names = "eval", onSingleton = true, required = 2) - @ImportStatic({ StringCachingGuards.class, StringOperations.class }) @ReportPolymorphism public abstract static class EvalNode extends CoreMethodArrayArgumentsNode { @@ -56,35 +51,38 @@ public abstract static class EvalNode extends CoreMethodArrayArgumentsNode { guards = { "idLib.isRubyString(langId)", "codeLib.isRubyString(code)", - "idEqualNode.execute(idLib.getRope(langId), cachedLangId)", - "codeEqualNode.execute(codeLib.getRope(code), cachedCode)" }, + "idEqualNode.execute(idLib, langId, cachedLangId, cachedLangIdEnc)", + "codeEqualNode.execute(codeLib, code, cachedCode, cachedCodeEnc)" }, limit = "getCacheLimit()") protected Object evalCached(Object langId, Object code, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary idLib, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary codeLib, - @Cached("idLib.getRope(langId)") Rope cachedLangId, - @Cached("codeLib.getRope(code)") Rope cachedCode, - @Cached("create(parse(idLib.getRope(langId), codeLib.getRope(code)))") DirectCallNode callNode, - @Cached RopeNodes.EqualNode idEqualNode, - @Cached RopeNodes.EqualNode codeEqualNode) { + @Cached RubyStringLibrary idLib, + @Cached RubyStringLibrary codeLib, + @Cached("asTruffleStringUncached(langId)") TruffleString cachedLangId, + @Cached("idLib.getEncoding(langId)") RubyEncoding cachedLangIdEnc, + @Cached("asTruffleStringUncached(code)") TruffleString cachedCode, + @Cached("codeLib.getEncoding(code)") RubyEncoding cachedCodeEnc, + @Cached("create(parse(getJavaString(langId), getJavaString(code)))") DirectCallNode callNode, + @Cached StringHelperNodes.EqualNode idEqualNode, + @Cached StringHelperNodes.EqualNode codeEqualNode) { return callNode.call(EMPTY_ARGUMENTS); } @Specialization( guards = { "stringsId.isRubyString(langId)", "stringsSource.isRubyString(code)" }, - replaces = "evalCached") + replaces = "evalCached", limit = "1") protected Object evalUncached(Object langId, Object code, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsId, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsSource, + @Cached RubyStringLibrary stringsId, + @Cached RubyStringLibrary stringsSource, + @Cached ToJavaStringNode toJavaStringLandNode, + @Cached ToJavaStringNode toJavaStringCodeNode, @Cached IndirectCallNode callNode) { - return callNode.call(parse(stringsId.getRope(langId), stringsSource.getRope(code)), EMPTY_ARGUMENTS); + return callNode.call(parse(toJavaStringLandNode.executeToJavaString(langId), + toJavaStringCodeNode.executeToJavaString(code)), EMPTY_ARGUMENTS); } @TruffleBoundary - protected CallTarget parse(Rope id, Rope code) { - final String idString = RopeOperations.decodeRope(id); - final String codeString = RopeOperations.decodeRope(code); - final Source source = Source.newBuilder(idString, codeString, "(eval)").build(); + protected CallTarget parse(String langId, String code) { + final Source source = Source.newBuilder(langId, code, "(eval)").build(); try { return getContext().getEnv().parsePublic(source); } catch (IllegalStateException e) { @@ -109,12 +107,12 @@ protected int getCacheLimit() { public abstract static class EvalFileNode extends CoreMethodArrayArgumentsNode { @TruffleBoundary - @Specialization(guards = "strings.isRubyString(fileName)") + @Specialization(guards = "strings.isRubyString(fileName)", limit = "1") protected Object evalFile(Object fileName, NotProvided id, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings) { final Source source; - //intern() to improve footprint - final String path = strings.getJavaString(fileName).intern(); + // intern() to improve footprint + final String path = RubyGuards.getJavaString(fileName).intern(); try { final TruffleFile file = getContext().getEnv().getPublicTruffleFile(path); String language = Source.findLanguage(file); @@ -134,15 +132,13 @@ protected Object evalFile(Object fileName, NotProvided id, } @TruffleBoundary - @Specialization( - guards = { - "stringsId.isRubyString(id)", - "stringsFileName.isRubyString(fileName)" }) + @Specialization(guards = { "stringsId.isRubyString(id)", "stringsFileName.isRubyString(fileName)" }, + limit = "1") protected Object evalFile(Object id, Object fileName, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsId, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary stringsFileName) { - final String idString = stringsId.getJavaString(id); - final Source source = getSource(idString, stringsFileName.getJavaString(fileName)); + @Cached RubyStringLibrary stringsId, + @Cached RubyStringLibrary stringsFileName) { + final String idString = RubyGuards.getJavaString(id); + final Source source = getSource(idString, RubyGuards.getJavaString(fileName)); return eval(source); } @@ -198,23 +194,24 @@ public abstract static class InnerContextEvalNode extends CoreMethodArrayArgumen "idLib.isRubyString(langId)", "codeLib.isRubyString(code)", "filenameLib.isRubyString(filename)", - "idEqualNode.execute(langIdRope, cachedLangId)", - "codeEqualNode.execute(codeRope, cachedCode)", - "filenameEqualNode.execute(filenameRope, cachedFilename)" }, limit = "getCacheLimit()") + "idEqualNode.execute(idLib, langId, cachedLangId, cachedLangIdEnc)", + "codeEqualNode.execute(codeLib, code, cachedCode, cachedCodeEnc)", + "filenameEqualNode.execute(filenameLib, filename, cachedFilename, cachedFilenameEnc)" }, + limit = "getCacheLimit()") protected Object evalCached(RubyInnerContext rubyInnerContext, Object langId, Object code, Object filename, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary idLib, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary codeLib, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary filenameLib, - @Bind("idLib.getRope(langId)") Rope langIdRope, - @Bind("codeLib.getRope(code)") Rope codeRope, - @Bind("filenameLib.getRope(filename)") Rope filenameRope, - @Cached("langIdRope") Rope cachedLangId, - @Cached("codeRope") Rope cachedCode, - @Cached("filenameRope") Rope cachedFilename, - @Cached("createSource(idLib.getJavaString(langId), codeLib.getJavaString(code), filenameLib.getJavaString(filename))") Source cachedSource, - @Cached RopeNodes.EqualNode idEqualNode, - @Cached RopeNodes.EqualNode codeEqualNode, - @Cached RopeNodes.EqualNode filenameEqualNode, + @Cached RubyStringLibrary idLib, + @Cached RubyStringLibrary codeLib, + @Cached RubyStringLibrary filenameLib, + @Cached("asTruffleStringUncached(langId)") TruffleString cachedLangId, + @Cached("idLib.getEncoding(langId)") RubyEncoding cachedLangIdEnc, + @Cached("asTruffleStringUncached(code)") TruffleString cachedCode, + @Cached("codeLib.getEncoding(code)") RubyEncoding cachedCodeEnc, + @Cached("asTruffleStringUncached(filename)") TruffleString cachedFilename, + @Cached("filenameLib.getEncoding(filename)") RubyEncoding cachedFilenameEnc, + @Cached("createSource(getJavaString(langId), getJavaString(code), getJavaString(filename))") Source cachedSource, + @Cached StringHelperNodes.EqualNode idEqualNode, + @Cached StringHelperNodes.EqualNode codeEqualNode, + @Cached StringHelperNodes.EqualNode filenameEqualNode, @Cached ForeignToRubyNode foreignToRubyNode, @Cached BranchProfile errorProfile) { return eval(rubyInnerContext, cachedSource, foreignToRubyNode, errorProfile); @@ -222,16 +219,19 @@ protected Object evalCached(RubyInnerContext rubyInnerContext, Object langId, Ob @Specialization( guards = { "idLib.isRubyString(langId)", "codeLib.isRubyString(code)" }, - replaces = "evalCached") + replaces = "evalCached", limit = "1") protected Object evalUncached(RubyInnerContext rubyInnerContext, Object langId, Object code, Object filename, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary idLib, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary codeLib, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary filenameLib, + @Cached RubyStringLibrary idLib, + @Cached RubyStringLibrary codeLib, + @Cached RubyStringLibrary filenameLib, + @Cached ToJavaStringNode toJavaStringIDNode, + @Cached ToJavaStringNode toJavaStringCodeNode, + @Cached ToJavaStringNode toJavaStringFileNode, @Cached ForeignToRubyNode foreignToRubyNode, @Cached BranchProfile errorProfile) { - final String idString = idLib.getJavaString(langId); - final String codeString = codeLib.getJavaString(code); - final String filenameString = filenameLib.getJavaString(filename); + final String idString = toJavaStringIDNode.executeToJavaString(langId); + final String codeString = toJavaStringCodeNode.executeToJavaString(code); + final String filenameString = toJavaStringFileNode.executeToJavaString(filename); final Source source = createSource(idString, codeString, filenameString); diff --git a/src/main/java/org/truffleruby/interop/SourceLocationNodes.java b/src/main/java/org/truffleruby/interop/SourceLocationNodes.java index 8be3ed651459..318ccfb3becc 100644 --- a/src/main/java/org/truffleruby/interop/SourceLocationNodes.java +++ b/src/main/java/org/truffleruby/interop/SourceLocationNodes.java @@ -12,14 +12,12 @@ import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.Specialization; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreModule; import org.truffleruby.builtins.UnaryCoreMethodNode; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import com.oracle.truffle.api.source.SourceSection; import org.truffleruby.core.thread.ThreadBacktraceLocationNodes; @@ -31,14 +29,13 @@ public class SourceLocationNodes { public abstract static class AbsolutePathNode extends UnaryCoreMethodNode { @TruffleBoundary @Specialization - protected Object absolutePath(RubySourceLocation location, - @Cached StringNodes.MakeStringNode makeStringNode) { + protected Object absolutePath(RubySourceLocation location) { final SourceSection sourceSection = location.sourceSection; if (!sourceSection.isAvailable()) { return coreStrings().UNKNOWN.createInstance(getContext()); } - return ThreadBacktraceLocationNodes.AbsolutePathNode.getAbsolutePath(sourceSection, makeStringNode, this); + return ThreadBacktraceLocationNodes.AbsolutePathNode.getAbsolutePath(sourceSection, this); } } @@ -46,15 +43,14 @@ protected Object absolutePath(RubySourceLocation location, public abstract static class PathNode extends UnaryCoreMethodNode { @TruffleBoundary @Specialization - protected RubyString path(RubySourceLocation location, - @Cached StringNodes.MakeStringNode makeStringNode) { + protected RubyString path(RubySourceLocation location) { final SourceSection sourceSection = location.sourceSection; if (!sourceSection.isAvailable()) { return coreStrings().UNKNOWN.createInstance(getContext()); } else { - final Rope path = getLanguage().getPathToRopeCache().getCachedPath(sourceSection.getSource()); - return makeStringNode.fromRope(path, Encodings.UTF_8); + var path = getLanguage().getPathToTStringCache().getCachedPath(sourceSection.getSource()); + return createString(path, Encodings.UTF_8); } } } @@ -118,9 +114,9 @@ public abstract static class LanguageNode extends UnaryCoreMethodNode { @TruffleBoundary @Specialization protected RubyString language(RubySourceLocation location, - @Cached StringNodes.MakeStringNode makeStringNode) { - return makeStringNode.executeMake(location.sourceSection.getSource().getLanguage(), - Encodings.UTF_8, CodeRange.CR_UNKNOWN); + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { + return createString(fromJavaStringNode, location.sourceSection.getSource().getLanguage(), + Encodings.UTF_8); } } diff --git a/src/main/java/org/truffleruby/language/AutoloadConstant.java b/src/main/java/org/truffleruby/language/AutoloadConstant.java index e8dee0196a92..faa2eefd0ef0 100644 --- a/src/main/java/org/truffleruby/language/AutoloadConstant.java +++ b/src/main/java/org/truffleruby/language/AutoloadConstant.java @@ -26,7 +26,7 @@ public class AutoloadConstant { AutoloadConstant(Object feature) { assert RubyStringLibrary.getUncached().isRubyString(feature); this.feature = feature; - this.autoloadPath = RubyStringLibrary.getUncached().getJavaString(this.feature); + this.autoloadPath = RubyGuards.getJavaString(this.feature); } public String getAutoloadPath() { diff --git a/src/main/java/org/truffleruby/language/DataNode.java b/src/main/java/org/truffleruby/language/DataNode.java index af9742f59ce8..2ceefdf50298 100644 --- a/src/main/java/org/truffleruby/language/DataNode.java +++ b/src/main/java/org/truffleruby/language/DataNode.java @@ -9,10 +9,9 @@ */ package org.truffleruby.language; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.language.dispatch.DispatchNode; import com.oracle.truffle.api.CompilerDirectives; @@ -21,7 +20,7 @@ public class DataNode extends RubyContextSourceNode { - @Child private StringNodes.MakeStringNode makeStringNode; + @Child private TruffleString.FromJavaStringNode fromJavaStringNode; @Child private DispatchNode callHelperNode; private final int endPosition; @@ -32,9 +31,9 @@ public DataNode(int endPosition) { @Override public Object execute(VirtualFrame frame) { - if (makeStringNode == null) { + if (fromJavaStringNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); - makeStringNode = insert(StringNodes.MakeStringNode.create()); + fromJavaStringNode = insert(TruffleString.FromJavaStringNode.create()); } if (callHelperNode == null) { @@ -44,7 +43,7 @@ public Object execute(VirtualFrame frame) { final String path = getPath(); final RubyEncoding rubyLocaleEncoding = getContext().getEncodingManager().getLocaleEncoding(); - final RubyString pathString = makeStringNode.executeMake(path, rubyLocaleEncoding, CodeRange.CR_UNKNOWN); + final RubyString pathString = createString(fromJavaStringNode, path, rubyLocaleEncoding); final Object data = callHelperNode .call(coreLibrary().truffleInternalModule, "get_data", pathString, endPosition); diff --git a/src/main/java/org/truffleruby/language/EmitWarningsNode.java b/src/main/java/org/truffleruby/language/EmitWarningsNode.java index 242b51607a92..7808c873e7cb 100644 --- a/src/main/java/org/truffleruby/language/EmitWarningsNode.java +++ b/src/main/java/org/truffleruby/language/EmitWarningsNode.java @@ -11,9 +11,7 @@ import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.frame.VirtualFrame; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyContext; -import org.truffleruby.core.rope.Rope; import org.truffleruby.core.string.RubyString; import org.truffleruby.core.string.StringOperations; import org.truffleruby.language.control.RaiseException; @@ -68,9 +66,9 @@ public static void printWarnings(RubyContext context, RubyDeferredWarnings warni private static void printWarning(RubyContext context, String message) { if (context.getCoreLibrary().isLoaded()) { final Object warning = context.getCoreLibrary().warningModule; - final Rope messageRope = StringOperations.encodeRope(message, UTF8Encoding.INSTANCE); - final RubyString messageString = StringOperations - .createUTF8String(context, context.getLanguageSlow(), messageRope); + final RubyString messageString = StringOperations.createUTF8String(context, context.getLanguageSlow(), + message); + DispatchNode.getUncached().call(warning, "warn", messageString); } else { try { diff --git a/src/main/java/org/truffleruby/language/RubyBaseNode.java b/src/main/java/org/truffleruby/language/RubyBaseNode.java index 52d9e69eb0a1..d05aa3bdbbd5 100644 --- a/src/main/java/org/truffleruby/language/RubyBaseNode.java +++ b/src/main/java/org/truffleruby/language/RubyBaseNode.java @@ -16,6 +16,9 @@ import com.oracle.truffle.api.nodes.LoopNode; import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.profiles.LoopConditionProfile; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.MutableTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.core.CoreLibrary; @@ -26,10 +29,13 @@ import org.truffleruby.core.exception.CoreExceptions; import org.truffleruby.core.numeric.BignumOperations; import org.truffleruby.core.numeric.RubyBignum; -import org.truffleruby.core.rope.Rope; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.core.string.CoreStrings; +import org.truffleruby.core.string.RubyString; import org.truffleruby.core.symbol.CoreSymbols; import org.truffleruby.core.symbol.RubySymbol; +import org.truffleruby.language.library.RubyStringLibrary; +import org.truffleruby.language.objects.AllocationTracing; import java.math.BigInteger; @@ -44,8 +50,6 @@ public abstract class RubyBaseNode extends Node { public static final int MAX_EXPLODE_SIZE = 16; - public static final int LIBSTRING_CACHE = 3; - public boolean isSingleContext() { return getLanguage().singleContext; } @@ -120,7 +124,7 @@ protected final RubySymbol getSymbol(String name) { return getLanguage().getSymbol(name); } - protected final RubySymbol getSymbol(Rope name, RubyEncoding encoding) { + protected final RubySymbol getSymbol(AbstractTruffleString name, RubyEncoding encoding) { return getLanguage().getSymbol(name, encoding); } @@ -152,6 +156,78 @@ protected final RubyArray createEmptyArray() { return ArrayHelpers.createEmptyArray(getContext(), getLanguage()); } + public final RubyString createString(TruffleString tstring, RubyEncoding encoding) { + final RubyString instance = new RubyString( + coreLibrary().stringClass, + getLanguage().stringShape, + false, + tstring, + encoding); + AllocationTracing.trace(instance, this); + return instance; + } + + public final RubyString createStringCopy(TruffleString.AsTruffleStringNode asTruffleStringNode, + AbstractTruffleString tstring, RubyEncoding encoding) { + final TruffleString copy = asTruffleStringNode.execute(tstring, encoding.tencoding); + final RubyString instance = new RubyString( + coreLibrary().stringClass, + getLanguage().stringShape, + false, + copy, + encoding); + AllocationTracing.trace(instance, this); + return instance; + } + + public final RubyString createMutableString(MutableTruffleString tstring, RubyEncoding encoding) { + final RubyString instance = new RubyString( + coreLibrary().stringClass, + getLanguage().stringShape, + false, + tstring, + encoding); + AllocationTracing.trace(instance, this); + return instance; + } + + public final RubyString createString(TStringWithEncoding tStringWithEncoding) { + return createString(tStringWithEncoding.tstring, tStringWithEncoding.encoding); + } + + protected final RubyString createString(TruffleString.FromByteArrayNode fromByteArrayNode, byte[] bytes, + RubyEncoding encoding) { + var tstring = fromByteArrayNode.execute(bytes, encoding.tencoding, false); + return createString(tstring, encoding); + } + + protected final RubyString createString(TruffleString.FromJavaStringNode fromJavaStringNode, String javaString, + RubyEncoding encoding) { + var tstring = fromJavaStringNode.execute(javaString, encoding.tencoding); + return createString(tstring, encoding); + } + + protected final RubyString createSubString(TruffleString.SubstringByteIndexNode substringNode, + RubyStringLibrary strings, Object source, int byteOffset, int byteLength) { + return createSubString(substringNode, strings.getTString(source), strings.getEncoding(source), byteOffset, + byteLength); + } + + protected final RubyString createSubString(TruffleString.SubstringByteIndexNode substringNode, + AbstractTruffleString tstring, RubyEncoding encoding, int byteOffset, int byteLength) { + final TruffleString substring = substringNode.execute(tstring, byteOffset, byteLength, encoding.tencoding, + true); + return createString(substring, encoding); + } + + protected final RubyString createSubString(TruffleString.SubstringNode substringNode, + AbstractTruffleString tstring, RubyEncoding encoding, int codePointOffset, int codePointLength) { + final TruffleString substring = substringNode.execute(tstring, codePointOffset, codePointLength, + encoding.tencoding, + true); + return createString(substring, encoding); + } + protected final CoreLibrary coreLibrary() { return getContext().getCoreLibrary(); } diff --git a/src/main/java/org/truffleruby/language/RubyDynamicObject.java b/src/main/java/org/truffleruby/language/RubyDynamicObject.java index ccbbbeb7b761..d1e9ac54fdf3 100644 --- a/src/main/java/org/truffleruby/language/RubyDynamicObject.java +++ b/src/main/java/org/truffleruby/language/RubyDynamicObject.java @@ -139,7 +139,7 @@ public Class getLanguage() { @ExportMessage public Object toDisplayString(boolean allowSideEffects, @Exclusive @Cached DispatchNode dispatchNode, - @CachedLibrary(limit = "getLibStringCacheLimit()") RubyStringLibrary libString, + @Cached RubyStringLibrary libString, @Cached KernelNodes.ToSNode kernelToSNode) { if (allowSideEffects) { Object inspect = dispatchNode.call(this, "inspect"); @@ -154,10 +154,6 @@ public Object toDisplayString(boolean allowSideEffects, } - protected static int getLibStringCacheLimit() { - return RubyBaseNode.LIBSTRING_CACHE; - } - // region Identity @ExportMessage public int identityHashCode() { diff --git a/src/main/java/org/truffleruby/language/RubyEvalInteractiveRootNode.java b/src/main/java/org/truffleruby/language/RubyEvalInteractiveRootNode.java index 3fd531058984..1ae8b5045f74 100644 --- a/src/main/java/org/truffleruby/language/RubyEvalInteractiveRootNode.java +++ b/src/main/java/org/truffleruby/language/RubyEvalInteractiveRootNode.java @@ -9,12 +9,11 @@ */ package org.truffleruby.language; -import org.jcodings.specific.UTF8Encoding; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.core.binding.RubyBinding; -import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.Rope; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.string.StringOperations; import org.truffleruby.language.backtrace.InternalRootNode; @@ -24,14 +23,12 @@ public class RubyEvalInteractiveRootNode extends RubyBaseRootNode implements InternalRootNode { - private final Rope sourceRope; - - private final RubyLanguage language; + private final TruffleString sourceString; + @Child DispatchNode callEvalNode = DispatchNode.create(); public RubyEvalInteractiveRootNode(RubyLanguage language, Source source) { super(language, null, null); - this.language = language; - this.sourceRope = StringOperations.encodeRope(source.getCharacters().toString(), UTF8Encoding.INSTANCE); + this.sourceString = TStringUtils.utf8TString(source.getCharacters().toString()); } @Override @@ -41,8 +38,10 @@ public Object execute(VirtualFrame frame) { // Just do Truffle::Boot::INTERACTIVE_BINDING.eval(code) for interactive sources. // It's the semantics we want and takes care of caching correctly based on the Binding's FrameDescriptor. final RubyBinding interactiveBinding = context.getCoreLibrary().interactiveBinding; - return DispatchNode.getUncached().call(interactiveBinding, "eval", - StringOperations.createString(this, sourceRope, Encodings.UTF_8)); + return callEvalNode.call( + interactiveBinding, + "eval", + StringOperations.createUTF8String(context, getLanguage(), sourceString)); } @Override diff --git a/src/main/java/org/truffleruby/language/RubyGuards.java b/src/main/java/org/truffleruby/language/RubyGuards.java index f591d7cc8a03..f534388c1f35 100644 --- a/src/main/java/org/truffleruby/language/RubyGuards.java +++ b/src/main/java/org/truffleruby/language/RubyGuards.java @@ -9,6 +9,10 @@ */ package org.truffleruby.language; +import com.oracle.truffle.api.CompilerAsserts; +import com.oracle.truffle.api.CompilerDirectives; +import com.oracle.truffle.api.dsl.Cached; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.CoreLibrary; import org.truffleruby.core.array.RubyArray; import org.truffleruby.core.encoding.RubyEncoding; @@ -230,6 +234,34 @@ public static boolean isMetaClass(RubyModule value) { return isSingletonClass(value) && ((RubyClass) value).attached instanceof RubyModule; } + // String uncached methods + + /** Use to initialize {@link Cached} values */ + public static TruffleString asTruffleStringUncached(Object rubyString) { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + if (rubyString instanceof RubyString) { + return ((RubyString) rubyString).asTruffleStringUncached(); + } else if (rubyString instanceof ImmutableRubyString) { + return ((ImmutableRubyString) rubyString).asTruffleStringUncached(); + } else { + throw CompilerDirectives.shouldNotReachHere(rubyString.getClass().getName()); + } + } + + /** This is an uncached conversion, for optimized cached conversion to java.lang.String use {@link ToJavaStringNode} + * instead. Note that {@link Object#toString()} should not be used because that would not check clearly that it is + * used only behind boundaries, and it would not fail if binary and non-ASCII. */ + public static String getJavaString(Object rubyString) { + CompilerAsserts.neverPartOfCompilation("Only behind @TruffleBoundary"); + if (rubyString instanceof RubyString) { + return ((RubyString) rubyString).getJavaString(); + } else if (rubyString instanceof ImmutableRubyString) { + return ((ImmutableRubyString) rubyString).getJavaString(); + } else { + throw CompilerDirectives.shouldNotReachHere(rubyString.getClass().getName()); + } + } + // Arguments public static boolean noArguments(Object[] args) { diff --git a/src/main/java/org/truffleruby/language/TruffleBootNodes.java b/src/main/java/org/truffleruby/language/TruffleBootNodes.java index 232d31b770a6..71b076326d44 100644 --- a/src/main/java/org/truffleruby/language/TruffleBootNodes.java +++ b/src/main/java/org/truffleruby/language/TruffleBootNodes.java @@ -11,12 +11,13 @@ import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.List; import java.util.stream.Collectors; import com.oracle.truffle.api.RootCallTarget; -import com.oracle.truffle.api.library.CachedLibrary; import com.oracle.truffle.api.nodes.NodeUtil; +import com.oracle.truffle.api.strings.TruffleString; import org.graalvm.collections.Pair; import org.graalvm.options.OptionDescriptor; import org.truffleruby.RubyContext; @@ -27,11 +28,7 @@ import org.truffleruby.builtins.CoreModule; import org.truffleruby.core.array.RubyArray; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.core.rope.Rope; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes.MakeStringNode; import org.truffleruby.core.symbol.RubySymbol; import org.truffleruby.language.control.RaiseException; import org.truffleruby.language.dispatch.DispatchNode; @@ -63,7 +60,7 @@ public abstract class TruffleBootNodes { @CoreMethod(names = "ruby_home", onSingleton = true) public abstract static class RubyHomeNode extends CoreMethodNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -72,7 +69,7 @@ protected Object rubyHome() { if (home == null) { return nil; } else { - return makeStringNode.executeMake(home, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, home, Encodings.UTF_8); } } @@ -112,7 +109,7 @@ public abstract static class MainNode extends CoreMethodArrayArgumentsNode { @Child DispatchNode checkSyntax = DispatchNode.create(); @Child IndirectCallNode callNode = IndirectCallNode.create(); @Child DispatchNode requireNode = DispatchNode.create(); - @Child MakeStringNode makeStringNode = MakeStringNode.create(); + @Child TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -134,7 +131,8 @@ protected int main(int argc, long argv, String kind, String toExecute) { if (getContext().getOptions().SYNTAX_CHECK) { checkSyntax.call(coreLibrary().truffleBootModule, "check_syntax", source); } else { - final Pair sourceRopePair = Pair.create(source.getSource(), source.getRope()); + var tstringWithEncoding = source.hasTruffleString() ? source.getTStringWithEncoding() : null; + var sourceRopePair = Pair.create(source.getSource(), tstringWithEncoding); final RootCallTarget callTarget = getContext() .getCodeLoader() .parseTopLevelWithCache(sourceRopePair, null); @@ -206,7 +204,7 @@ private RubySource loadMainSourceSettingDollarZero(String kind, String toExecute } private RubyString utf8(String string) { - return makeStringNode.executeMake(string, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, string, Encodings.UTF_8); } } @@ -214,7 +212,7 @@ private RubyString utf8(String string) { @CoreMethod(names = "original_argv", onSingleton = true) public abstract static class OriginalArgvNode extends CoreMethodNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -223,10 +221,10 @@ protected RubyArray originalArgv() { final Object[] array = new Object[argv.length]; for (int n = 0; n < array.length; n++) { - array[n] = makeStringNode.executeMake( + array[n] = createString( + fromJavaStringNode, argv[n], - getContext().getEncodingManager().getDefaultExternalEncoding(), - CodeRange.CR_UNKNOWN); + getContext().getEncodingManager().getDefaultExternalEncoding()); } return createArray(array); @@ -237,7 +235,7 @@ protected RubyArray originalArgv() { @CoreMethod(names = "extra_load_paths", onSingleton = true) public abstract static class ExtraLoadPathsNode extends CoreMethodNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -246,7 +244,7 @@ protected RubyArray extraLoadPaths() { final Object[] array = new Object[paths.length]; for (int n = 0; n < array.length; n++) { - array[n] = makeStringNode.executeMake(paths[n], Encodings.UTF_8, CodeRange.CR_UNKNOWN); + array[n] = createString(fromJavaStringNode, paths[n], Encodings.UTF_8); } return createArray(array); @@ -257,7 +255,7 @@ protected RubyArray extraLoadPaths() { @CoreMethod(names = "source_of_caller", onSingleton = true) public abstract static class SourceOfCallerNode extends CoreMethodArrayArgumentsNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -277,8 +275,7 @@ protected Object sourceOfCaller() { return nil; } - return makeStringNode - .executeMake(getLanguage().getSourcePath(source), Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, getLanguage().getSourcePath(source), Encodings.UTF_8); } } @@ -306,13 +303,13 @@ protected Object innerCheckSyntax(RubySource source) { @CoreMethod(names = "get_option", onSingleton = true, required = 1) public abstract static class GetOptionNode extends CoreMethodArrayArgumentsNode { - @Child private MakeStringNode makeStringNode = MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary - @Specialization(guards = "libOptionName.isRubyString(optionName)") + @Specialization(guards = "libOptionName.isRubyString(optionName)", limit = "1") protected Object getOption(Object optionName, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libOptionName) { - final String optionNameString = libOptionName.getJavaString(optionName); + @Cached RubyStringLibrary libOptionName) { + final String optionNameString = RubyGuards.getJavaString(optionName); final OptionDescriptor descriptor = OptionsCatalog.fromName("ruby." + optionNameString); if (descriptor == null) { throw new RaiseException( @@ -340,7 +337,7 @@ protected Object getOption(Object optionName, } else if (value instanceof Enum) { return getSymbol(value.toString()); } else if (value instanceof String) { - return makeStringNode.executeMake(value, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, (String) value, Encodings.UTF_8); } else if (value instanceof String[]) { return toRubyArray((String[]) value); } else { @@ -352,7 +349,7 @@ protected Object getOption(Object optionName, private RubyArray toRubyArray(String[] strings) { final Object[] objects = new Object[strings.length]; for (int n = 0; n < strings.length; n++) { - objects[n] = makeStringNode.executeMake(strings[n], Encodings.UTF_8, CodeRange.CR_UNKNOWN); + objects[n] = createString(fromJavaStringNode, strings[n], Encodings.UTF_8); } return createArray(objects); } @@ -387,12 +384,12 @@ public abstract static class ToolchainExecutableNode extends CoreMethodArrayArgu @TruffleBoundary @Specialization protected Object toolchainExecutable(RubySymbol executable, - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { final String name = executable.getString(); final Toolchain toolchain = getToolchain(getContext(), this); final TruffleFile path = toolchain.getToolPath(name); if (path != null) { - return makeStringNode.executeMake(path.getPath(), Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, path.getPath(), Encodings.UTF_8); } else { throw new RaiseException( getContext(), @@ -408,7 +405,7 @@ public abstract static class ToolchainPathsNode extends CoreMethodArrayArguments @TruffleBoundary @Specialization protected Object toolchainPaths(RubySymbol pathName, - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { final String name = pathName.getString(); final Toolchain toolchain = getToolchain(getContext(), this); final List paths = toolchain.getPaths(name); @@ -417,7 +414,7 @@ protected Object toolchainPaths(RubySymbol pathName, .stream() .map(file -> file.getPath()) .collect(Collectors.joining(File.pathSeparator)); - return makeStringNode.executeMake(path, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, path, Encodings.UTF_8); } else { throw new RaiseException( getContext(), @@ -453,7 +450,7 @@ public abstract static class BasicABIVersionNode extends CoreMethodNode { @TruffleBoundary @Specialization protected RubyString basicABIVersion( - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { TruffleFile file = getLanguage().getRubyHomeTruffleFile().resolve(ABI_VERSION_FILE); byte[] bytes; try { @@ -462,8 +459,8 @@ protected RubyString basicABIVersion( throw CompilerDirectives.shouldNotReachHere(e); } - String basicVersion = RopeOperations.decodeAscii(bytes).strip(); - return makeStringNode.executeMake(basicVersion, Encodings.UTF_8, CodeRange.CR_7BIT); + String basicVersion = new String(bytes, StandardCharsets.US_ASCII).strip(); + return createString(fromJavaStringNode, basicVersion, Encodings.UTF_8); } } diff --git a/src/main/java/org/truffleruby/language/WarnNode.java b/src/main/java/org/truffleruby/language/WarnNode.java index c1898560471a..15807d872081 100644 --- a/src/main/java/org/truffleruby/language/WarnNode.java +++ b/src/main/java/org/truffleruby/language/WarnNode.java @@ -11,11 +11,10 @@ import com.oracle.truffle.api.nodes.DenyReplace; import com.oracle.truffle.api.nodes.NodeCost; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes.MakeStringNode; import org.truffleruby.language.dispatch.DispatchNode; import org.truffleruby.language.globals.ReadSimpleGlobalVariableNode; @@ -31,7 +30,7 @@ public class WarnNode extends RubyBaseNode { @Child protected ReadSimpleGlobalVariableNode readVerboseNode = ReadSimpleGlobalVariableNode.create("$VERBOSE"); @Child private DispatchNode callWarnNode; - @Child private MakeStringNode makeStringNode; + @Child private TruffleString.FromJavaStringNode fromJavaStringNode; public boolean shouldWarn() { final Object verbosity = readVerboseNode.execute(); @@ -47,24 +46,22 @@ public final boolean shouldWarnForDeprecation() { public void warningMessage(SourceSection sourceSection, String message) { assert shouldWarn(); - if (makeStringNode == null) { + if (fromJavaStringNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); - makeStringNode = insert(MakeStringNode.create()); + fromJavaStringNode = insert(TruffleString.FromJavaStringNode.create()); } if (callWarnNode == null) { CompilerDirectives.transferToInterpreterAndInvalidate(); callWarnNode = insert(DispatchNode.create()); } - callWarn(getContext(), sourceSection, message, makeStringNode, callWarnNode); + callWarn(getContext(), sourceSection, message, this, fromJavaStringNode, callWarnNode); } - static void callWarn(RubyContext context, SourceSection sourceSection, String message, - MakeStringNode makeStringNode, DispatchNode callWarnNode) { + static void callWarn(RubyContext context, SourceSection sourceSection, String message, RubyBaseNode node, + TruffleString.FromJavaStringNode fromJavaStringNode, DispatchNode callWarnNode) { final String warningMessage = buildWarningMessage(context, sourceSection, message); - - final RubyString warningString = makeStringNode - .executeMake(warningMessage, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + final RubyString warningString = node.createString(fromJavaStringNode, warningMessage, Encodings.UTF_8); callWarnNode.call(context.getCoreLibrary().kernelModule, "warn", warningString); } @@ -82,10 +79,8 @@ abstract static class AbstractUncachedWarnNode extends RubyBaseNode { public void warningMessage(SourceSection sourceSection, String message) { assert shouldWarn(); WarnNode.callWarn( - getContext(), - sourceSection, - message, - MakeStringNode.getUncached(), + getContext(), sourceSection, message, this, + TruffleString.FromJavaStringNode.getUncached(), DispatchNode.getUncached()); } diff --git a/src/main/java/org/truffleruby/language/backtrace/BacktraceFormatter.java b/src/main/java/org/truffleruby/language/backtrace/BacktraceFormatter.java index a3f45202cc64..d002bc51096c 100644 --- a/src/main/java/org/truffleruby/language/backtrace/BacktraceFormatter.java +++ b/src/main/java/org/truffleruby/language/backtrace/BacktraceFormatter.java @@ -16,7 +16,6 @@ import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.source.Source; import com.oracle.truffle.api.source.SourceSection; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.SuppressFBWarnings; @@ -27,10 +26,10 @@ import org.truffleruby.core.exception.RubyException; import org.truffleruby.core.string.StringOperations; import org.truffleruby.core.string.StringUtils; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.RubyRootNode; import org.truffleruby.language.control.RaiseException; import org.truffleruby.language.dispatch.DispatchNode; -import org.truffleruby.language.library.RubyStringLibrary; import org.truffleruby.language.methods.TranslateExceptionNode; import org.truffleruby.parser.RubySource; @@ -138,7 +137,7 @@ public void printRubyExceptionOnEnvStderr(String info, AbstractTruffleException "get_formatted_backtrace", exceptionObject); final String formatted = fullMessage != null - ? RubyStringLibrary.getUncached().getJavaString(fullMessage) + ? RubyGuards.getJavaString(fullMessage) : ""; if (formatted.endsWith("\n")) { printer.print(formatted); @@ -200,16 +199,14 @@ public RubyArray formatBacktraceAsRubyStringArray(RubyException exception, Backt return formatBacktraceAsRubyStringArray(exception, backtrace, Integer.MAX_VALUE); } + @TruffleBoundary public RubyArray formatBacktraceAsRubyStringArray(RubyException exception, Backtrace backtrace, int length) { final String[] lines = formatBacktraceAsStringArray(exception, backtrace, length); final Object[] array = new Object[lines.length]; for (int n = 0; n < lines.length; n++) { - array[n] = StringOperations.createUTF8String( - context, - language, - StringOperations.encodeRope(lines[n], UTF8Encoding.INSTANCE)); + array[n] = StringOperations.createUTF8String(context, language, lines[n]); } return ArrayHelpers.createArray(context, language, array); diff --git a/src/main/java/org/truffleruby/language/dispatch/DispatchNode.java b/src/main/java/org/truffleruby/language/dispatch/DispatchNode.java index c2837df9083e..05c3bf356b86 100644 --- a/src/main/java/org/truffleruby/language/dispatch/DispatchNode.java +++ b/src/main/java/org/truffleruby/language/dispatch/DispatchNode.java @@ -9,6 +9,7 @@ */ package org.truffleruby.language.dispatch; +import com.oracle.truffle.api.CompilerAsserts; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.CompilationFinal; import com.oracle.truffle.api.HostCompilerDirectives.InliningCutoff; @@ -66,6 +67,7 @@ public static DispatchNode create() { } public static DispatchNode getUncached(DispatchConfiguration config) { + CompilerAsserts.neverPartOfCompilation("uncached"); return Uncached.UNCACHED_NODES[config.ordinal()]; } @@ -414,12 +416,16 @@ public final void applySplittingInliningStrategy(RootCallTarget callTarget, Stri private static final class Uncached extends DispatchNode { static final Uncached[] UNCACHED_NODES = new Uncached[DispatchConfiguration.values().length]; + static { for (DispatchConfiguration config : DispatchConfiguration.values()) { UNCACHED_NODES[config.ordinal()] = new Uncached(config); } } + public static final DispatchNode UNCACHED_METHOD_MISSING_NODE = DispatchNode + .getUncached(DispatchConfiguration.PRIVATE_RETURN_MISSING_IGNORE_REFINEMENTS); + protected Uncached(DispatchConfiguration config) { super(config, null, null, null, null); } @@ -441,7 +447,7 @@ protected CallForeignMethodNode getCallForeignMethodNode() { @Override protected DispatchNode getMethodMissingNode() { - return DispatchNode.getUncached(DispatchConfiguration.PRIVATE_RETURN_MISSING_IGNORE_REFINEMENTS); + return UNCACHED_METHOD_MISSING_NODE; } @Override diff --git a/src/main/java/org/truffleruby/language/dispatch/InternalRespondToNode.java b/src/main/java/org/truffleruby/language/dispatch/InternalRespondToNode.java index e20db9eab6c8..9e58a6856d22 100644 --- a/src/main/java/org/truffleruby/language/dispatch/InternalRespondToNode.java +++ b/src/main/java/org/truffleruby/language/dispatch/InternalRespondToNode.java @@ -9,6 +9,7 @@ */ package org.truffleruby.language.dispatch; +import com.oracle.truffle.api.CompilerAsserts; import com.oracle.truffle.api.frame.Frame; import com.oracle.truffle.api.nodes.DenyReplace; import com.oracle.truffle.api.nodes.NodeCost; @@ -42,6 +43,7 @@ public static InternalRespondToNode create() { } public static InternalRespondToNode getUncached(DispatchConfiguration config) { + CompilerAsserts.neverPartOfCompilation("uncached"); return Uncached.UNCACHED_NODES[config.ordinal()]; } diff --git a/src/main/java/org/truffleruby/language/library/RubyStringLibrary.java b/src/main/java/org/truffleruby/language/library/RubyStringLibrary.java index c72da836b567..efa4f8410c64 100644 --- a/src/main/java/org/truffleruby/language/library/RubyStringLibrary.java +++ b/src/main/java/org/truffleruby/language/library/RubyStringLibrary.java @@ -9,33 +9,247 @@ */ package org.truffleruby.language.library; -import com.oracle.truffle.api.library.GenerateLibrary; -import com.oracle.truffle.api.library.Library; -import com.oracle.truffle.api.library.LibraryFactory; +import com.oracle.truffle.api.CompilerDirectives; +import com.oracle.truffle.api.CompilerDirectives.CompilationFinal; +import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.Rope; +import org.truffleruby.core.string.ImmutableRubyString; +import org.truffleruby.core.string.RubyString; +import org.truffleruby.language.RubyGuards; -@GenerateLibrary -public abstract class RubyStringLibrary extends Library { +/** It is important that all messages of this library can be trivially implemented without needing any @Cached state or + * node. That way, the generated library classes are actually global immutable singletons. + *

+ * Implemented by {@link org.truffleruby.core.string.RubyString} and + * {@link org.truffleruby.core.string.ImmutableRubyString} */ +public abstract class RubyStringLibrary { - private static final LibraryFactory FACTORY = LibraryFactory.resolve(RubyStringLibrary.class); - - public static LibraryFactory getFactory() { - return FACTORY; + public static RubyStringLibrary create() { + return new Cached(); } public static RubyStringLibrary getUncached() { - return FACTORY.getUncached(); + return Uncached.INSTANCE; } - public boolean isRubyString(Object receiver) { - return false; - } + /** Used to create separate specialization instances for RubyString and ImmutableRubyString */ + public abstract boolean seen(Object object); + + public abstract boolean isRubyString(Object object); - public abstract Rope getRope(Object object); + public abstract AbstractTruffleString getTString(Object object); public abstract RubyEncoding getEncoding(Object object); - public abstract String getJavaString(Object receiver); + public final TruffleString.Encoding getTEncoding(Object object) { + return getEncoding(object).tencoding; + } + + public abstract int byteLength(Object object); + + public abstract RubyEncoding profileEncoding(RubyEncoding encoding); + + static final class Cached extends RubyStringLibrary { + + @CompilationFinal private boolean seenMutable, seenImmutable, seenOther; + @CompilationFinal private Object cachedEncoding; + + private static final Object GENERIC = new Object(); + + @Override + public boolean seen(Object object) { + assert object instanceof RubyString || object instanceof ImmutableRubyString; + if (seenMutable) { + return object instanceof RubyString; + } else if (seenImmutable) { + return object instanceof ImmutableRubyString; + } else { + CompilerDirectives.transferToInterpreterAndInvalidate(); + getTString(object); // specialize + return true; + } + } + + @Override + public boolean isRubyString(Object object) { + if (seenMutable && object instanceof RubyString) { + return true; + } else if (seenImmutable && object instanceof ImmutableRubyString) { + return true; + } else if (seenOther && RubyGuards.isNotRubyString(object)) { + return false; + } + + CompilerDirectives.transferToInterpreterAndInvalidate(); + return specializeIsRubyString(object); + } + + private boolean specializeIsRubyString(Object object) { + if (object instanceof RubyString) { + seenMutable = true; + return true; + } else if (object instanceof ImmutableRubyString) { + seenImmutable = true; + return true; + } else if (RubyGuards.isNotRubyString(object)) { + seenOther = true; + return false; + } else { + throw CompilerDirectives.shouldNotReachHere(); + } + } + + @Override + public AbstractTruffleString getTString(Object object) { + if (seenMutable && object instanceof RubyString) { + return ((RubyString) object).tstring; + } else if (seenImmutable && object instanceof ImmutableRubyString) { + return ((ImmutableRubyString) object).tstring; + } + + CompilerDirectives.transferToInterpreterAndInvalidate(); + return specializeGetTString(object); + } + + private AbstractTruffleString specializeGetTString(Object object) { + if (object instanceof RubyString) { + seenMutable = true; + return ((RubyString) object).tstring; + } else if (object instanceof ImmutableRubyString) { + seenImmutable = true; + return ((ImmutableRubyString) object).tstring; + } else { + throw CompilerDirectives.shouldNotReachHere(); + } + } + + @Override + public RubyEncoding profileEncoding(RubyEncoding encoding) { + var localCachedEncoding = this.cachedEncoding; + if (encoding == localCachedEncoding) { + return (RubyEncoding) localCachedEncoding; + } else if (localCachedEncoding == GENERIC) { + return encoding; + } else { + CompilerDirectives.transferToInterpreterAndInvalidate(); + return specializeProfileEncoding(encoding); + } + } + + private RubyEncoding specializeProfileEncoding(RubyEncoding encoding) { + var localCachedEncoding = this.cachedEncoding; + if (localCachedEncoding == null) { + this.cachedEncoding = encoding; + } else if (encoding != localCachedEncoding) { + this.cachedEncoding = GENERIC; + } + return encoding; + } + + @Override + public RubyEncoding getEncoding(Object object) { + final RubyEncoding encoding; + if (seenMutable && object instanceof RubyString) { + encoding = ((RubyString) object).getEncodingUnprofiled(); + } else if (seenImmutable && object instanceof ImmutableRubyString) { + encoding = ((ImmutableRubyString) object).getEncodingUnprofiled(); + } else { + CompilerDirectives.transferToInterpreterAndInvalidate(); + return specializeGetEncoding(object); + } + + return profileEncoding(encoding); + } + + private RubyEncoding specializeGetEncoding(Object object) { + final RubyEncoding encoding; + if (object instanceof RubyString) { + seenMutable = true; + encoding = ((RubyString) object).getEncodingUnprofiled(); + } else if (object instanceof ImmutableRubyString) { + seenImmutable = true; + encoding = ((ImmutableRubyString) object).getEncodingUnprofiled(); + } else { + throw CompilerDirectives.shouldNotReachHere(); + } + + return specializeProfileEncoding(encoding); + } + + @Override + public int byteLength(Object object) { + if (seenMutable && object instanceof RubyString) { + var mutable = (RubyString) object; + return getTString(mutable).byteLength(getTEncoding(mutable)); + } else if (seenImmutable && object instanceof ImmutableRubyString) { + var immutable = (ImmutableRubyString) object; + return getTString(immutable).byteLength(getTEncoding(immutable)); + } + + CompilerDirectives.transferToInterpreterAndInvalidate(); + return specializeByteLength(object); + } + + private int specializeByteLength(Object object) { + // getTString() and getTEncoding() will specialize as needed + return getTString(object).byteLength(getTEncoding(object)); + } + } + + static final class Uncached extends RubyStringLibrary { + + static final Uncached INSTANCE = new Uncached(); + + @TruffleBoundary + @Override + public boolean seen(Object object) { + assert object instanceof RubyString || object instanceof ImmutableRubyString; + return true; + } + + @TruffleBoundary + @Override + public boolean isRubyString(Object object) { + return object instanceof RubyString || object instanceof ImmutableRubyString; + } + + @TruffleBoundary + @Override + public AbstractTruffleString getTString(Object object) { + if (object instanceof RubyString) { + return ((RubyString) object).tstring; + } else if (object instanceof ImmutableRubyString) { + return ((ImmutableRubyString) object).tstring; + } else { + throw CompilerDirectives.shouldNotReachHere(); + } + } + + @TruffleBoundary + @Override + public RubyEncoding profileEncoding(RubyEncoding encoding) { + return encoding; + } + + @TruffleBoundary + @Override + public RubyEncoding getEncoding(Object object) { + if (object instanceof RubyString) { + return ((RubyString) object).getEncodingUncached(); + } else if (object instanceof ImmutableRubyString) { + return ((ImmutableRubyString) object).getEncodingUncached(); + } else { + throw CompilerDirectives.shouldNotReachHere(); + } + } + + @TruffleBoundary + @Override + public int byteLength(Object object) { + return getTString(object).byteLength(getTEncoding(object)); + } + } } diff --git a/src/main/java/org/truffleruby/language/literal/StringLiteralNode.java b/src/main/java/org/truffleruby/language/literal/StringLiteralNode.java index 2bcf13c95e2b..9e3a7f7f67ef 100644 --- a/src/main/java/org/truffleruby/language/literal/StringLiteralNode.java +++ b/src/main/java/org/truffleruby/language/literal/StringLiteralNode.java @@ -9,35 +9,26 @@ */ package org.truffleruby.language.literal; -import org.truffleruby.core.encoding.Encodings; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.RubyEncoding; -import org.truffleruby.core.rope.Rope; import org.truffleruby.core.string.RubyString; import org.truffleruby.language.RubyContextSourceNode; import com.oracle.truffle.api.frame.VirtualFrame; -import org.truffleruby.language.objects.AllocationTracing; public class StringLiteralNode extends RubyContextSourceNode { - private final Rope rope; + private final TruffleString tstring; private final RubyEncoding encoding; - public StringLiteralNode(Rope rope) { - this.rope = rope; - this.encoding = Encodings.getBuiltInEncoding(rope.encoding.getIndex()); + public StringLiteralNode(TruffleString tstring, RubyEncoding encoding) { + this.tstring = tstring; + this.encoding = encoding; } @Override public RubyString execute(VirtualFrame frame) { - final RubyString string = new RubyString( - coreLibrary().stringClass, - getLanguage().stringShape, - false, - rope, - encoding); - AllocationTracing.trace(string, this); - return string; + return createString(tstring, encoding); } } diff --git a/src/main/java/org/truffleruby/language/loader/CodeLoader.java b/src/main/java/org/truffleruby/language/loader/CodeLoader.java index 86498385b710..93188edc5044 100644 --- a/src/main/java/org/truffleruby/language/loader/CodeLoader.java +++ b/src/main/java/org/truffleruby/language/loader/CodeLoader.java @@ -14,7 +14,7 @@ import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.core.module.RubyModule; -import org.truffleruby.core.rope.Rope; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.language.LexicalScope; import org.truffleruby.language.Nil; import org.truffleruby.language.RubyNode; @@ -52,9 +52,9 @@ public CodeLoader(RubyLanguage language, RubyContext context) { } @TruffleBoundary - public RootCallTarget parseTopLevelWithCache(Pair sourceRopePair, Node currentNode) { + public RootCallTarget parseTopLevelWithCache(Pair sourceRopePair, Node currentNode) { final Source source = sourceRopePair.getLeft(); - final Rope rope = sourceRopePair.getRight(); + final TStringWithEncoding rope = sourceRopePair.getRight(); final String path = RubyLanguage.getPath(source); if (language.singleContext && !alreadyLoadedInContext.add(language.getPathRelativeToHome(path))) { diff --git a/src/main/java/org/truffleruby/language/loader/EmbeddedScript.java b/src/main/java/org/truffleruby/language/loader/EmbeddedScript.java index 02a9fde93d44..32898aec06e6 100644 --- a/src/main/java/org/truffleruby/language/loader/EmbeddedScript.java +++ b/src/main/java/org/truffleruby/language/loader/EmbeddedScript.java @@ -12,7 +12,7 @@ import com.oracle.truffle.api.nodes.Node; import org.truffleruby.RubyContext; import org.truffleruby.collections.ByteArrayBuilder; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.StringOperations; import org.truffleruby.language.control.RaiseException; /* @@ -24,7 +24,7 @@ public class EmbeddedScript { private final RubyContext context; - private static final byte[] PREFIX_COMMENT = RopeOperations.encodeAsciiBytes("# line ignored by Ruby: "); + private static final byte[] PREFIX_COMMENT = StringOperations.encodeAsciiBytes("# line ignored by Ruby: "); public EmbeddedScript(RubyContext context) { this.context = context; diff --git a/src/main/java/org/truffleruby/language/loader/EvalLoader.java b/src/main/java/org/truffleruby/language/loader/EvalLoader.java index 696507678454..e5eb3fa6aa52 100644 --- a/src/main/java/org/truffleruby/language/loader/EvalLoader.java +++ b/src/main/java/org/truffleruby/language/loader/EvalLoader.java @@ -10,12 +10,13 @@ package org.truffleruby.language.loader; import com.oracle.truffle.api.nodes.Node; -import org.jcodings.Encoding; +import com.oracle.truffle.api.strings.AbstractTruffleString; import org.truffleruby.RubyContext; import org.truffleruby.core.encoding.EncodingManager; -import org.truffleruby.core.rope.CannotConvertBinaryRubyStringToJavaString; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.string.CannotConvertBinaryRubyStringToJavaString; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.language.control.RaiseException; import org.truffleruby.parser.RubySource; import org.truffleruby.parser.lexer.RubyLexer; @@ -27,13 +28,21 @@ public abstract class EvalLoader { @TruffleBoundary - public static RubySource createEvalSource(RubyContext context, Rope code, String method, String file, int line, - Node currentNode) { - final Rope sourceRope = createEvalRope(code); + public static RubySource createEvalSource(RubyContext context, AbstractTruffleString codeTString, + RubyEncoding encoding, String method, String file, int line, Node currentNode) { + var code = new TStringWithEncoding(codeTString.asTruffleStringUncached(encoding.tencoding), encoding); + + var sourceTString = createEvalRope(code); + var sourceEncoding = sourceTString.encoding; + + if (!sourceEncoding.isAsciiCompatible) { + throw new RaiseException(context, context.getCoreExceptions() + .argumentError(sourceEncoding + " is not ASCII compatible", currentNode)); + } final String sourceString; try { - sourceString = RopeOperations.decodeRope(sourceRope); + sourceString = sourceTString.toJavaStringOrThrow(); } catch (CannotConvertBinaryRubyStringToJavaString e) { // In such a case, we have no way to build a Java String for the Truffle Source that // could accurately represent the source Rope, so we throw an error. @@ -50,23 +59,23 @@ public static RubySource createEvalSource(RubyContext context, Rope code, String final Source source = Source.newBuilder(TruffleRuby.LANGUAGE_ID, sourceString, file).build(); - final RubySource rubySource = new RubySource(source, file, sourceRope, true, line - 1); + final RubySource rubySource = new RubySource(source, file, sourceTString, true, line - 1); context.getSourceLineOffsets().put(source, line - 1); return rubySource; } - private static Rope createEvalRope(Rope source) { - final Encoding[] encoding = { source.getEncoding() }; + private static TStringWithEncoding createEvalRope(TStringWithEncoding source) { + final RubyEncoding[] encoding = { source.getEncoding() }; RubyLexer.parseMagicComment(source, (name, value) -> { if (RubyLexer.isMagicEncodingComment(name)) { - encoding[0] = EncodingManager.getEncoding(value); + encoding[0] = Encodings.getBuiltInEncoding(EncodingManager.getEncoding(value)); } }); if (source.getEncoding() != encoding[0]) { - source = RopeOperations.withEncoding(source, encoding[0]); + source = source.forceEncoding(encoding[0]); } return source; diff --git a/src/main/java/org/truffleruby/language/loader/FeatureLoader.java b/src/main/java/org/truffleruby/language/loader/FeatureLoader.java index 47055ad15e1d..f77a290ded52 100644 --- a/src/main/java/org/truffleruby/language/loader/FeatureLoader.java +++ b/src/main/java/org/truffleruby/language/loader/FeatureLoader.java @@ -23,7 +23,6 @@ import com.oracle.truffle.api.interop.UnknownIdentifierException; import com.oracle.truffle.api.interop.UnsupportedMessageException; import org.jcodings.Encoding; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.collections.ConcurrentOperations; @@ -42,9 +41,9 @@ import org.truffleruby.interop.TranslateInteropExceptionNode; import org.truffleruby.language.Nil; import org.truffleruby.language.RubyConstant; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.control.RaiseException; import org.truffleruby.language.dispatch.DispatchNode; -import org.truffleruby.language.library.RubyStringLibrary; import org.truffleruby.platform.NativeConfiguration; import org.truffleruby.platform.Platform; import org.truffleruby.platform.TruffleNFIPlatform; @@ -303,7 +302,7 @@ private String findFeatureImpl(String feature) { context.getCoreLibrary().truffleFeatureLoaderModule, "get_expanded_load_path"); for (Object pathObject : ArrayOperations.toIterable(expandedLoadPath)) { - final String loadPath = RubyStringLibrary.getUncached().getJavaString(pathObject); + final String loadPath = RubyGuards.getJavaString(pathObject); if (context.getOptions().LOG_FEATURE_LOCATION) { RubyLanguage.LOGGER.info(String.format("from load path %s...", loadPath)); @@ -324,7 +323,7 @@ private String findFeatureImpl(String feature) { "get_expanded_load_path"); for (Object pathObject : ArrayOperations.toIterable(expandedLoadPath)) { // $LOAD_PATH entries are canonicalized since Ruby 2.4.4 - final String loadPath = RubyStringLibrary.getUncached().getJavaString(pathObject); + final String loadPath = RubyGuards.getJavaString(pathObject); if (context.getOptions().LOG_FEATURE_LOCATION) { RubyLanguage.LOGGER.info(String.format("from load path %s...", loadPath)); @@ -435,11 +434,7 @@ public void ensureCExtImplementationLoaded(String feature, RequireNode requireNo Metrics.printTime("before-load-cext-support"); try { - final RubyString cextRb = StringOperations - .createUTF8String( - context, - language, - StringOperations.encodeRope("truffle/cext", UTF8Encoding.INSTANCE)); + final RubyString cextRb = StringOperations.createUTF8String(context, language, "truffle/cext"); DispatchNode.getUncached().call(context.getCoreLibrary().mainObject, "gem_original_require", cextRb); final RubyModule truffleModule = context.getCoreLibrary().truffleModule; @@ -526,10 +521,8 @@ private Object getEmbeddedABIVersion(String expandedPath, Object library) { ArrayUtils.EMPTY_ARRAY, abiFunctionInteropLibrary, TranslateInteropExceptionNode.getUncached()); - return StringOperations.createUTF8String( - context, - language, - StringOperations.encodeRope(abiVersion, UTF8Encoding.INSTANCE)); + + return StringOperations.createUTF8String(context, language, abiVersion); } Object findFunctionInLibrary(Object library, String functionName, String path) { diff --git a/src/main/java/org/truffleruby/language/loader/FileLoader.java b/src/main/java/org/truffleruby/language/loader/FileLoader.java index 568ab02275fc..d032d216caf7 100644 --- a/src/main/java/org/truffleruby/language/loader/FileLoader.java +++ b/src/main/java/org/truffleruby/language/loader/FileLoader.java @@ -15,12 +15,11 @@ import com.oracle.truffle.api.nodes.Node; import org.graalvm.collections.Pair; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.language.control.RaiseException; import org.truffleruby.shared.TruffleRuby; @@ -59,7 +58,7 @@ public static void ensureReadable(RubyContext context, TruffleFile file, Node cu } - public Pair loadFile(String path) throws IOException { + public Pair loadFile(String path) throws IOException { if (context.getOptions().LOG_LOAD) { RubyLanguage.LOGGER.info("loading " + path); } @@ -72,9 +71,10 @@ public Pair loadFile(String path) throws IOException { * and pass them down to the lexer and to the Source. */ final byte[] sourceBytes = file.readAllBytes(); - final Rope sourceRope = RopeOperations.create(sourceBytes, UTF8Encoding.INSTANCE, CodeRange.CR_UNKNOWN); - final Source source = buildSource(file, path, sourceRope, isInternal(path), false); - return Pair.create(source, sourceRope); + var tstringWithEnc = new TStringWithEncoding(TStringUtils.fromByteArray(sourceBytes, Encodings.UTF_8), + Encodings.UTF_8); + final Source source = buildSource(file, path, tstringWithEnc, isInternal(path), false); + return Pair.create(source, tstringWithEnc); } public static TruffleFile getSafeTruffleFile(RubyLanguage language, RubyContext context, String path) { @@ -120,7 +120,8 @@ private static boolean isStdLibRubyOrCExtFile(TruffleFile relativePathFromHome) return relativePathFromHome.startsWith("lib"); } - Source buildSource(TruffleFile file, String path, Rope sourceRope, boolean internal, boolean mainSource) { + Source buildSource(TruffleFile file, String path, TStringWithEncoding sourceRope, boolean internal, + boolean mainSource) { /* I'm not sure why we need to explicitly set a MIME type here - we say it's Ruby and this is the only and * default MIME type that Ruby supports. * @@ -146,7 +147,7 @@ Source buildSource(TruffleFile file, String path, Rope sourceRope, boolean inter .newBuilder(TruffleRuby.LANGUAGE_ID, file) .canonicalizePath(false) .mimeType(mimeType) - .content(RopeOperations.decodeOrEscapeBinaryRope(sourceRope)) + .content(sourceRope.tstring.toString()) .internal(internal) .cached(!coverageEnabled) .build(); diff --git a/src/main/java/org/truffleruby/language/loader/MainLoader.java b/src/main/java/org/truffleruby/language/loader/MainLoader.java index 98d4eaddee47..49926d4b7cd6 100644 --- a/src/main/java/org/truffleruby/language/loader/MainLoader.java +++ b/src/main/java/org/truffleruby/language/loader/MainLoader.java @@ -12,12 +12,11 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.parser.RubySource; import org.truffleruby.shared.TruffleRuby; @@ -49,23 +48,23 @@ public RubySource loadFromCommandLineArgument(String code) { public RubySource loadFromStandardIn(Node currentNode, String path) throws IOException { byte[] sourceBytes = readAllOfStandardIn(); - final Rope sourceRope = transformScript(currentNode, path, sourceBytes); + var sourceRope = transformScript(currentNode, path, sourceBytes); final Source source = Source - .newBuilder(TruffleRuby.LANGUAGE_ID, RopeOperations.decodeOrEscapeBinaryRope(sourceRope), path) + .newBuilder(TruffleRuby.LANGUAGE_ID, sourceRope.toString(), path) .mimeType(RubyLanguage.MIME_TYPE_MAIN_SCRIPT) .build(); return new RubySource(source, path, sourceRope); } - private Rope transformScript(Node currentNode, String path, byte[] sourceBytes) { + private TStringWithEncoding transformScript(Node currentNode, String path, byte[] sourceBytes) { final EmbeddedScript embeddedScript = new EmbeddedScript(context); if (embeddedScript.shouldTransform(sourceBytes)) { sourceBytes = embeddedScript.transformForExecution(currentNode, sourceBytes, path); } - return RopeOperations.create(sourceBytes, UTF8Encoding.INSTANCE, CodeRange.CR_UNKNOWN); + return new TStringWithEncoding(TStringUtils.fromByteArray(sourceBytes, Encodings.UTF_8), Encodings.UTF_8); } private byte[] readAllOfStandardIn() throws IOException { @@ -97,11 +96,11 @@ public RubySource loadFromFile(Env env, Node currentNode, String mainPath) throw * and pass them down to the lexer and to the Source. */ byte[] sourceBytes = file.readAllBytes(); - final Rope sourceRope = transformScript(currentNode, mainPath, sourceBytes); + var sourceTString = transformScript(currentNode, mainPath, sourceBytes); - final Source mainSource = fileLoader.buildSource(file, mainPath, sourceRope, false, true); + final Source mainSource = fileLoader.buildSource(file, mainPath, sourceTString, false, true); - return new RubySource(mainSource, mainPath, sourceRope); + return new RubySource(mainSource, mainPath, sourceTString); } } diff --git a/src/main/java/org/truffleruby/language/loader/RequireNode.java b/src/main/java/org/truffleruby/language/loader/RequireNode.java index 0bb94deecb1f..3d799ed1e8ea 100644 --- a/src/main/java/org/truffleruby/language/loader/RequireNode.java +++ b/src/main/java/org/truffleruby/language/loader/RequireNode.java @@ -19,18 +19,19 @@ import java.util.stream.Collectors; import com.oracle.truffle.api.RootCallTarget; -import com.oracle.truffle.api.library.CachedLibrary; +import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.source.Source; import org.graalvm.collections.Pair; import org.truffleruby.RubyLanguage; import org.truffleruby.cext.ValueWrapperManager; import org.truffleruby.core.array.ArrayUtils; import org.truffleruby.core.cast.BooleanCastNode; -import org.truffleruby.core.rope.Rope; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.interop.InteropNodes; import org.truffleruby.interop.TranslateInteropExceptionNode; import org.truffleruby.language.RubyBaseNode; import org.truffleruby.language.RubyConstant; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.WarningNode; import org.truffleruby.language.constants.GetConstantNode; import org.truffleruby.language.control.RaiseException; @@ -61,22 +62,23 @@ public abstract class RequireNode extends RubyBaseNode { public abstract boolean executeRequire(String feature, Object expandedPath); - @Specialization(guards = "libExpandedPathString.isRubyString(expandedPathString)") + @Specialization(guards = "libExpandedPathString.isRubyString(expandedPathString)", limit = "1") protected boolean require(String feature, Object expandedPathString, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary libExpandedPathString) { - final String expandedPath = libExpandedPathString.getJavaString(expandedPathString); - return requireWithMetrics(feature, expandedPath, expandedPathString); + @Cached RubyStringLibrary libExpandedPathString) { + return requireWithMetrics(feature, expandedPathString); } @TruffleBoundary - private boolean requireWithMetrics(String feature, String expandedPathRaw, Object pathString) { + private boolean requireWithMetrics(String feature, Object pathString) { + String internedExpandedPath = RubyGuards.getJavaString(pathString).intern(); + requireMetric("before-require-" + feature); try { //intern() to improve footprint return getContext().getMetricsProfiler().callWithMetrics( "require", feature, - () -> requireConsideringAutoload(feature, expandedPathRaw.intern(), pathString)); + () -> requireConsideringAutoload(feature, internedExpandedPath, pathString)); } finally { requireMetric("after-require-" + feature); } @@ -148,7 +150,7 @@ private boolean doRequire(String originalFeature, String expandedPath, Object pa Object relativeFeatureString = relativeFeatureNode .call(coreLibrary().truffleFeatureLoaderModule, "relative_feature", pathString); if (RubyStringLibrary.getUncached().isRubyString(relativeFeatureString)) { - relativeFeature = RubyStringLibrary.getUncached().getJavaString(relativeFeatureString); + relativeFeature = RubyGuards.getJavaString(relativeFeatureString); } } Boolean patchLoaded = patchFiles.get(relativeFeature); @@ -217,7 +219,7 @@ private boolean parseAndCall(String feature, String expandedPath) { requireCExtension(feature, expandedPath, this); } else { // All other files are assumed to be Ruby, the file type detection is not enough - final Pair sourceRopePair; + final Pair sourceRopePair; try { final FileLoader fileLoader = new FileLoader(getContext(), getLanguage()); sourceRopePair = fileLoader.loadFile(expandedPath); diff --git a/src/main/java/org/truffleruby/language/methods/TranslateExceptionNode.java b/src/main/java/org/truffleruby/language/methods/TranslateExceptionNode.java index c9a9e0e9c1ae..d13eac6e7a53 100644 --- a/src/main/java/org/truffleruby/language/methods/TranslateExceptionNode.java +++ b/src/main/java/org/truffleruby/language/methods/TranslateExceptionNode.java @@ -11,6 +11,7 @@ import com.oracle.truffle.api.dsl.Fallback; import com.oracle.truffle.api.exception.AbstractTruffleException; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.core.VMPrimitiveNodes.InitStackOverflowClassesEagerlyNode; import org.truffleruby.core.exception.ExceptionOperations; @@ -89,13 +90,17 @@ protected RuntimeException translate(Throwable e) { } protected boolean needsSpecialTranslation(Throwable e) { - return e instanceof UnsupportedSpecializationException || e instanceof StackOverflowError || + return e instanceof TruffleString.IllegalByteArrayLengthException || + e instanceof UnsupportedSpecializationException || + e instanceof StackOverflowError || e instanceof OutOfMemoryError; } @TruffleBoundary private RaiseException doTranslateSpecial(Throwable e) { - if (e instanceof UnsupportedSpecializationException) { + if (e instanceof TruffleString.IllegalByteArrayLengthException) { + return new RaiseException(getContext(), coreExceptions().argumentError(e.getMessage(), this)); + } else if (e instanceof UnsupportedSpecializationException) { return new RaiseException(getContext(), translateUnsupportedSpecialization(getContext(), (UnsupportedSpecializationException) e)); } else if (e instanceof StackOverflowError) { diff --git a/src/main/java/org/truffleruby/language/objects/AllocationTracing.java b/src/main/java/org/truffleruby/language/objects/AllocationTracing.java index e37286e2cce6..9427ffb2eb9c 100644 --- a/src/main/java/org/truffleruby/language/objects/AllocationTracing.java +++ b/src/main/java/org/truffleruby/language/objects/AllocationTracing.java @@ -10,15 +10,12 @@ package org.truffleruby.language.objects; import com.oracle.truffle.api.object.DynamicObjectLibrary; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.Layouts; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.core.inlined.AlwaysInlinedMethodNode; import org.truffleruby.core.module.RubyModule; import org.truffleruby.core.objectspace.ObjectSpaceManager; -import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringOperations; import org.truffleruby.language.LexicalScope; import org.truffleruby.language.RubyDynamicObject; import org.truffleruby.language.arguments.RubyArguments; @@ -160,11 +157,4 @@ private static void storeAllocationTrace(RubyContext context, RubyDynamicObject DynamicObjectLibrary.getUncached().put(object, Layouts.ALLOCATION_TRACE_IDENTIFIER, trace); } - private static RubyString string(RubyContext context, RubyLanguage language, String value) { - // No point to use MakeStringNode (which uses AllocateObjectNode) here, as we should not - // trace the allocation of Strings used for tracing allocations. - return StringOperations - .createUTF8String(context, language, StringOperations.encodeRope(value, UTF8Encoding.INSTANCE)); - } - } diff --git a/src/main/java/org/truffleruby/options/Options.java b/src/main/java/org/truffleruby/options/Options.java index a08939ad576e..4b82eceb99a6 100644 --- a/src/main/java/org/truffleruby/options/Options.java +++ b/src/main/java/org/truffleruby/options/Options.java @@ -160,8 +160,8 @@ public class Options { public final boolean BUILDING_CORE_CEXTS; /** --log-pending-interrupts=false */ public final boolean LOG_PENDING_INTERRUPTS; - /** --rope-print-intern-stats=false */ - public final boolean ROPE_PRINT_INTERN_STATS; + /** --print-interned-tstring-stats=false */ + public final boolean PRINT_INTERNED_TSTRING_STATS; /** --cexts-to-native-stats=false */ public final boolean CEXTS_TO_NATIVE_STATS; /** --cexts-to-native-count=CEXTS_TO_NATIVE_STATS */ @@ -278,7 +278,7 @@ public Options(Env env, OptionValues options, LanguageOptions languageOptions) { ARGV_GLOBAL_FLAGS = options.get(OptionsCatalog.ARGV_GLOBAL_FLAGS_KEY); BUILDING_CORE_CEXTS = options.get(OptionsCatalog.BUILDING_CORE_CEXTS_KEY); LOG_PENDING_INTERRUPTS = options.get(OptionsCatalog.LOG_PENDING_INTERRUPTS_KEY); - ROPE_PRINT_INTERN_STATS = options.get(OptionsCatalog.ROPE_PRINT_INTERN_STATS_KEY); + PRINT_INTERNED_TSTRING_STATS = options.get(OptionsCatalog.PRINT_INTERNED_TSTRING_STATS_KEY); CEXTS_TO_NATIVE_STATS = options.get(OptionsCatalog.CEXTS_TO_NATIVE_STATS_KEY); CEXTS_TO_NATIVE_COUNT = options.hasBeenSet(OptionsCatalog.CEXTS_TO_NATIVE_COUNT_KEY) ? options.get(OptionsCatalog.CEXTS_TO_NATIVE_COUNT_KEY) : CEXTS_TO_NATIVE_STATS; BASICOPS_LOG_REWRITE = options.get(OptionsCatalog.BASICOPS_LOG_REWRITE_KEY); @@ -442,8 +442,8 @@ public Object fromDescriptor(OptionDescriptor descriptor) { return BUILDING_CORE_CEXTS; case "ruby.log-pending-interrupts": return LOG_PENDING_INTERRUPTS; - case "ruby.rope-print-intern-stats": - return ROPE_PRINT_INTERN_STATS; + case "ruby.print-interned-tstring-stats": + return PRINT_INTERNED_TSTRING_STATS; case "ruby.cexts-to-native-stats": return CEXTS_TO_NATIVE_STATS; case "ruby.cexts-to-native-count": diff --git a/src/main/java/org/truffleruby/parser/BodyTranslator.java b/src/main/java/org/truffleruby/parser/BodyTranslator.java index 16963b22f671..d31e608bcb56 100644 --- a/src/main/java/org/truffleruby/parser/BodyTranslator.java +++ b/src/main/java/org/truffleruby/parser/BodyTranslator.java @@ -19,6 +19,8 @@ import java.util.List; import com.oracle.truffle.api.TruffleSafepoint; +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.TruffleString; import org.jcodings.Encoding; import org.joni.NameEntry; import org.joni.Regex; @@ -43,7 +45,6 @@ import org.truffleruby.core.cast.ToSNode; import org.truffleruby.core.cast.ToSNodeGen; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.hash.ConcatHashLiteralNode; import org.truffleruby.core.hash.HashLiteralNode; import org.truffleruby.core.kernel.KernelNodesFactory; @@ -58,11 +59,10 @@ import org.truffleruby.core.regexp.RegexWarnDeferredCallback; import org.truffleruby.core.regexp.RegexpOptions; import org.truffleruby.core.regexp.RubyRegexp; -import org.truffleruby.core.rope.LeafRope; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeConstants; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.core.string.FrozenStrings; import org.truffleruby.core.string.InterpolatedStringNode; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.core.string.StringUtils; import org.truffleruby.core.support.TypeNodes; import org.truffleruby.core.string.ImmutableRubyString; @@ -514,9 +514,8 @@ public RubyNode visitCallNode(CallParseNode node) { if (receiver instanceof StrParseNode && (methodName.equals("freeze") || methodName.equals("-@"))) { final StrParseNode strNode = (StrParseNode) receiver; - final Rope nodeRope = strNode.getValue(); - final ImmutableRubyString frozenString = language - .getFrozenStringLiteral(nodeRope.getBytes(), nodeRope.getEncoding(), strNode.getCodeRange()); + final TruffleString nodeRope = strNode.getValue(); + final ImmutableRubyString frozenString = language.getFrozenStringLiteral(nodeRope, strNode.encoding); return addNewlineIfNeeded(node, withSourceSection( sourceSection, new FrozenStringLiteralNode(frozenString, FrozenStrings.METHOD))); @@ -1545,8 +1544,7 @@ public RubyNode visitDotNode(DotParseNode node) { @Override public RubyNode visitEncodingNode(EncodingParseNode node) { SourceIndexLength sourceSection = node.getPosition(); - final RubyNode ret = new ObjectLiteralNode( - Encodings.getBuiltInEncoding(node.getEncoding().getIndex())); + final RubyNode ret = new ObjectLiteralNode(Encodings.getBuiltInEncoding(node.getEncoding())); ret.unsafeSetSourceSection(sourceSection); return addNewlineIfNeeded(node, ret); } @@ -1566,7 +1564,8 @@ public RubyNode visitEvStrNode(EvStrParseNode node) { if (node.getBody() == null) { // "#{}" final SourceIndexLength sourceSection = node.getPosition(); - ret = new ObjectLiteralNode(language.getFrozenStringLiteral(RopeConstants.EMPTY_ASCII_8BIT_ROPE)); + ret = new ObjectLiteralNode( + language.getFrozenStringLiteral(TStringConstants.EMPTY_BINARY, Encodings.BINARY)); ret.unsafeSetSourceSection(sourceSection); } else { ret = node.getBody().accept(this); @@ -2139,20 +2138,20 @@ public RubyNode visitMatch2Node(Match2ParseNode node) { if (node.getReceiverNode() instanceof RegexpParseNode) { final RegexpParseNode regexpNode = (RegexpParseNode) node.getReceiverNode(); - final byte[] bytes = regexpNode.getValue().getBytes(); + final TStringWithEncoding source = regexpNode.getValue(); + final InternalByteArray sourceByteArray = source.getInternalByteArray(); final Regex regex; try { regex = new Regex( - bytes, - 0, - bytes.length, + sourceByteArray.getArray(), + sourceByteArray.getOffset(), + sourceByteArray.getEnd(), regexpNode.getOptions().toOptions(), - regexpNode.getEncoding(), + regexpNode.getRubyEncoding().jcoding, Syntax.RUBY, new RegexWarnDeferredCallback(rubyWarnings)); } catch (Exception e) { - String errorMessage = ClassicRegexp - .getRegexErrorMessage(regexpNode.getValue(), e, regexpNode.getOptions()); + String errorMessage = ClassicRegexp.getRegexErrorMessage(source.tstring, e, regexpNode.getOptions()); final RubyContext context = RubyLanguage.getCurrentContext(); throw new RaiseException(context, context.getCoreExceptions().regexpError(errorMessage, currentNode)); } @@ -2729,11 +2728,11 @@ public RubyNode visitRedoNode(RedoParseNode node) { @Override public RubyNode visitRegexpNode(RegexpParseNode node) { - final Rope rope = node.getValue(); - final RubyEncoding encoding = Encodings.getBuiltInEncoding(rope.getEncoding().getIndex()); + final TStringWithEncoding source = node.getValue(); final RegexpOptions options = node.getOptions().setLiteral(true); try { - final RubyRegexp regexp = RubyRegexp.create(language, rope, encoding, options, currentNode); + final RubyRegexp regexp = RubyRegexp.create(language, source.tstring, source.encoding, options, + currentNode); final ObjectLiteralNode literalNode = new ObjectLiteralNode(regexp); literalNode.unsafeSetSourceSection(node.getPosition()); return addNewlineIfNeeded(node, literalNode); @@ -2950,27 +2949,23 @@ public RubyNode visitSplatNode(SplatParseNode node) { @Override public RubyNode visitStrNode(StrParseNode node) { - final Rope nodeRope = node.getValue(); final RubyNode ret; - if (node.isFrozen()) { - final ImmutableRubyString frozenString = language - .getFrozenStringLiteral(nodeRope.getBytes(), nodeRope.getEncoding(), node.getCodeRange()); + var frozenString = language.getFrozenStringLiteral(node.getValue(), node.encoding); ret = new FrozenStringLiteralNode(frozenString, FrozenStrings.EXPRESSION); } else { - final LeafRope cachedRope = language.ropeCache - .getRope(nodeRope.getBytes(), nodeRope.getEncoding(), node.getCodeRange()); - ret = new StringLiteralNode(cachedRope); + var cachedTString = language.tstringCache.getTString(node.getValue(), node.encoding); + ret = new StringLiteralNode(cachedTString, node.encoding); } + ret.unsafeSetSourceSection(node.getPosition()); return addNewlineIfNeeded(node, ret); } @Override public RubyNode visitSymbolNode(SymbolParseNode node) { - final RubyNode ret = new ObjectLiteralNode(language.getSymbol( - node.getRope(), - Encodings.getBuiltInEncoding(node.getRope().getEncoding().getIndex()))); + var encoding = Encodings.getBuiltInEncoding(node.getEncoding()); + final RubyNode ret = new ObjectLiteralNode(language.getSymbol(node.getTString(), encoding)); ret.unsafeSetSourceSection(node.getPosition()); return addNewlineIfNeeded(node, ret); } @@ -3072,7 +3067,7 @@ private RubyNode translateWhileNode(WhileParseNode node, boolean conditionInvers public RubyNode visitXStrNode(XStrParseNode node) { final ParseNode argsNode = buildArrayNode( node.getPosition(), - new StrParseNode(node.getPosition(), node.getValue())); + new StrParseNode(node.getPosition(), node.getValue(), node.encoding)); final ParseNode callNode = new FCallParseNode(node.getPosition(), "`", argsNode, null); final RubyNode ret = callNode.accept(this); return addNewlineIfNeeded(node, ret); diff --git a/src/main/java/org/truffleruby/parser/ParsingParameters.java b/src/main/java/org/truffleruby/parser/ParsingParameters.java index 5fcbcc7863aa..1a3ace96a921 100644 --- a/src/main/java/org/truffleruby/parser/ParsingParameters.java +++ b/src/main/java/org/truffleruby/parser/ParsingParameters.java @@ -12,18 +12,18 @@ import com.oracle.truffle.api.nodes.Node; import com.oracle.truffle.api.source.Source; import org.truffleruby.RubyLanguage; -import org.truffleruby.core.rope.Rope; +import org.truffleruby.core.string.TStringWithEncoding; public final class ParsingParameters { /** For exceptions during parsing */ private final Node currentNode; - private final Rope rope; + private final TStringWithEncoding tstringWithEnc; private final Source source; - public ParsingParameters(Node currentNode, Rope rope, Source source) { + public ParsingParameters(Node currentNode, TStringWithEncoding tstringWithEnc, Source source) { this.currentNode = currentNode; - this.rope = rope; + this.tstringWithEnc = tstringWithEnc; this.source = source; } @@ -35,8 +35,8 @@ public String getPath() { return RubyLanguage.getPath(source); } - public Rope getRope() { - return rope; + public TStringWithEncoding getTStringWithEnc() { + return tstringWithEnc; } public Source getSource() { diff --git a/src/main/java/org/truffleruby/parser/RubySource.java b/src/main/java/org/truffleruby/parser/RubySource.java index e85de6fbeb9d..f8f0292ad8c8 100644 --- a/src/main/java/org/truffleruby/parser/RubySource.java +++ b/src/main/java/org/truffleruby/parser/RubySource.java @@ -12,11 +12,13 @@ import java.util.Objects; import com.oracle.truffle.api.source.SourceSection; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; -import org.truffleruby.core.rope.Rope; import com.oracle.truffle.api.source.Source; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.string.TStringWithEncoding; public class RubySource { @@ -25,7 +27,8 @@ public class RubySource { * {@link RubyLanguage#getPath(Source)}. Kept separate as we might want to change Source#getName() for non-file * Sources in the future (but then we'll need to still use this path in Ruby backtraces). */ private final String sourcePath; - private final Rope sourceRope; + private final TruffleString code; + private final RubyEncoding encoding; private final boolean isEval; private final int lineOffset; @@ -33,20 +36,21 @@ public RubySource(Source source, String sourcePath) { this(source, sourcePath, null, false); } - public RubySource(Source source, String sourcePath, Rope sourceRope) { - this(source, sourcePath, sourceRope, false); + public RubySource(Source source, String sourcePath, TStringWithEncoding code) { + this(source, sourcePath, code, false); } - public RubySource(Source source, String sourcePath, Rope sourceRope, boolean isEval) { - this(source, sourcePath, sourceRope, isEval, 0); + public RubySource(Source source, String sourcePath, TStringWithEncoding code, boolean isEval) { + this(source, sourcePath, code, isEval, 0); } - public RubySource(Source source, String sourcePath, Rope sourceRope, boolean isEval, int lineOffset) { + public RubySource(Source source, String sourcePath, TStringWithEncoding code, boolean isEval, int lineOffset) { assert RubyLanguage.getPath(source).equals(sourcePath) : RubyLanguage.getPath(source) + " vs " + sourcePath; this.source = Objects.requireNonNull(source); //intern() to improve footprint this.sourcePath = Objects.requireNonNull(sourcePath).intern(); - this.sourceRope = sourceRope; + this.code = code != null ? code.tstring : null; + this.encoding = code != null ? code.encoding : null; this.isEval = isEval; this.lineOffset = lineOffset; } @@ -59,8 +63,22 @@ public String getSourcePath() { return sourcePath; } - public Rope getRope() { - return sourceRope; + public boolean hasTruffleString() { + return code != null; + } + + public TruffleString getTruffleString() { + return code; + } + + public TStringWithEncoding getTStringWithEncoding() { + assert hasTruffleString(); + return new TStringWithEncoding(code, encoding); + } + + public RubyEncoding getEncoding() { + assert hasTruffleString(); + return encoding; } public boolean isEval() { diff --git a/src/main/java/org/truffleruby/parser/RubyWarnings.java b/src/main/java/org/truffleruby/parser/RubyWarnings.java index a603f7d2e811..681cca90c588 100644 --- a/src/main/java/org/truffleruby/parser/RubyWarnings.java +++ b/src/main/java/org/truffleruby/parser/RubyWarnings.java @@ -34,11 +34,9 @@ import java.io.IOException; import java.nio.charset.StandardCharsets; -import org.jcodings.specific.UTF8Encoding; import org.joni.WarnCallback; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; -import org.truffleruby.core.rope.Rope; import org.truffleruby.core.string.RubyString; import org.truffleruby.core.string.StringOperations; import org.truffleruby.language.control.RaiseException; @@ -108,9 +106,9 @@ private void printWarning(String message) { RubyContext context = RubyLanguage.getCurrentContext(); if (context.getCoreLibrary().isLoaded()) { final Object warning = context.getCoreLibrary().warningModule; - final Rope messageRope = StringOperations.encodeRope(message, UTF8Encoding.INSTANCE); - final RubyString messageString = StringOperations - .createUTF8String(context, context.getLanguageSlow(), messageRope); + final RubyString messageString = StringOperations.createUTF8String(context, context.getLanguageSlow(), + message); + DispatchNode.getUncached().call(warning, "warn", messageString); } else { try { diff --git a/src/main/java/org/truffleruby/parser/TranslatorDriver.java b/src/main/java/org/truffleruby/parser/TranslatorDriver.java index b9b3f52b49c5..fc86b43bafd5 100644 --- a/src/main/java/org/truffleruby/parser/TranslatorDriver.java +++ b/src/main/java/org/truffleruby/parser/TranslatorDriver.java @@ -44,7 +44,6 @@ import com.oracle.truffle.api.RootCallTarget; import com.oracle.truffle.api.TruffleSafepoint; import com.oracle.truffle.api.frame.FrameDescriptor; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.aot.ParserCache; @@ -162,12 +161,6 @@ public RootCallTarget parse(RubySource rubySource, ParserContext parserContext, parserConfiguration.setFrozenStringLiteral(true); } - if (rubySource.getRope() != null) { - parserConfiguration.setDefaultEncoding(rubySource.getRope().getEncoding()); - } else { - parserConfiguration.setDefaultEncoding(UTF8Encoding.INSTANCE); - } - // Parse to the JRuby AST final RootParseNode node; @@ -384,7 +377,7 @@ private String getMethodName(ParserContext parserContext, MaterializedFrame pare public static RootParseNode parseToJRubyAST(RubyContext context, RubySource rubySource, StaticScope blockScope, ParserConfiguration configuration, RubyDeferredWarnings rubyWarnings) { - LexerSource lexerSource = new LexerSource(rubySource, configuration.getDefaultEncoding()); + LexerSource lexerSource = new LexerSource(rubySource); // We only need to pass in current scope if we are evaluating as a block (which // is only done for evals). We need to pass this in so that we can appropriately scope // down to captured scopes when we are parsing. diff --git a/src/main/java/org/truffleruby/parser/ast/BackRefParseNode.java b/src/main/java/org/truffleruby/parser/ast/BackRefParseNode.java index 89e3adf32d05..eebdcbbe1e30 100644 --- a/src/main/java/org/truffleruby/parser/ast/BackRefParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/BackRefParseNode.java @@ -33,10 +33,7 @@ import java.util.List; -import org.jcodings.specific.USASCIIEncoding; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -64,8 +61,9 @@ public T accept(NodeVisitor iVisitor) { return iVisitor.visitBackRefNode(this); } - public Rope getByteName() { - return RopeOperations.create(new byte[]{ '$', (byte) type }, USASCIIEncoding.INSTANCE, CodeRange.CR_7BIT); + public TruffleString getByteName() { + return TruffleString.fromByteArrayUncached(new byte[]{ '$', (byte) type }, TruffleString.Encoding.US_ASCII, + false); } /** Gets the type diff --git a/src/main/java/org/truffleruby/parser/ast/ClassVarAsgnParseNode.java b/src/main/java/org/truffleruby/parser/ast/ClassVarAsgnParseNode.java index 95e91e58188d..aba528bd8df6 100644 --- a/src/main/java/org/truffleruby/parser/ast/ClassVarAsgnParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/ClassVarAsgnParseNode.java @@ -33,7 +33,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.INameNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -44,10 +44,10 @@ public class ClassVarAsgnParseNode extends AssignableParseNode implements INameN /** @param name id of the class variable to assign to * @param valueNode ParseNode used to compute the new value when the assignment is evaled */ - public ClassVarAsgnParseNode(SourceIndexLength position, Rope name, ParseNode valueNode) { + public ClassVarAsgnParseNode(SourceIndexLength position, TruffleString name, ParseNode valueNode) { super(position, valueNode); - this.name = name.getJavaString(); + this.name = name.toJavaStringUncached(); } @Override diff --git a/src/main/java/org/truffleruby/parser/ast/ClassVarParseNode.java b/src/main/java/org/truffleruby/parser/ast/ClassVarParseNode.java index 168bac299cf5..79a8755fda13 100644 --- a/src/main/java/org/truffleruby/parser/ast/ClassVarParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/ClassVarParseNode.java @@ -33,7 +33,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.INameNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -42,8 +42,8 @@ public class ClassVarParseNode extends ParseNode implements INameNode, SideEffectFree { private String name; - public ClassVarParseNode(SourceIndexLength position, Rope name) { - this(position, name.getJavaString()); + public ClassVarParseNode(SourceIndexLength position, TruffleString name) { + this(position, name.toJavaStringUncached()); } public ClassVarParseNode(SourceIndexLength position, String name) { diff --git a/src/main/java/org/truffleruby/parser/ast/Colon2ConstParseNode.java b/src/main/java/org/truffleruby/parser/ast/Colon2ConstParseNode.java index abc787e9984c..2b4585c8df85 100644 --- a/src/main/java/org/truffleruby/parser/ast/Colon2ConstParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/Colon2ConstParseNode.java @@ -26,12 +26,12 @@ ***** END LICENSE BLOCK *****/ package org.truffleruby.parser.ast; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; /** @author enebo */ public class Colon2ConstParseNode extends Colon2ParseNode { - public Colon2ConstParseNode(SourceIndexLength position, ParseNode leftNode, Rope name) { + public Colon2ConstParseNode(SourceIndexLength position, ParseNode leftNode, TruffleString name) { super(position, leftNode, name); assert leftNode != null : "Colon2ConstParseNode cannot have null leftNode"; diff --git a/src/main/java/org/truffleruby/parser/ast/Colon2ImplicitParseNode.java b/src/main/java/org/truffleruby/parser/ast/Colon2ImplicitParseNode.java index 0a2b70713ea7..8d0dd5b0e38c 100644 --- a/src/main/java/org/truffleruby/parser/ast/Colon2ImplicitParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/Colon2ImplicitParseNode.java @@ -28,13 +28,13 @@ ***** END LICENSE BLOCK *****/ package org.truffleruby.parser.ast; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; /** Represents a bare class declaration (e.g. class Foo/module Foo). This is slightly misnamed since it contains no * double colons (::), but our cname production needs to be a common type. In JRuby 2, we will rename this. */ public class Colon2ImplicitParseNode extends Colon2ParseNode { - public Colon2ImplicitParseNode(SourceIndexLength position, Rope name) { + public Colon2ImplicitParseNode(SourceIndexLength position, TruffleString name) { super(position, null, name); } } diff --git a/src/main/java/org/truffleruby/parser/ast/Colon2ParseNode.java b/src/main/java/org/truffleruby/parser/ast/Colon2ParseNode.java index 30273503d394..8a8e1fa971f2 100644 --- a/src/main/java/org/truffleruby/parser/ast/Colon2ParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/Colon2ParseNode.java @@ -33,7 +33,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.INameNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -42,7 +42,7 @@ public abstract class Colon2ParseNode extends Colon3ParseNode implements INameNode { protected final ParseNode leftNode; - public Colon2ParseNode(SourceIndexLength position, ParseNode leftNode, Rope name) { + public Colon2ParseNode(SourceIndexLength position, ParseNode leftNode, TruffleString name) { super(position, name); this.leftNode = leftNode; } diff --git a/src/main/java/org/truffleruby/parser/ast/Colon3ParseNode.java b/src/main/java/org/truffleruby/parser/ast/Colon3ParseNode.java index c86d5cf7a46e..3c218b7aca40 100644 --- a/src/main/java/org/truffleruby/parser/ast/Colon3ParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/Colon3ParseNode.java @@ -33,7 +33,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.INameNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -43,9 +43,9 @@ public class Colon3ParseNode extends ParseNode implements INameNode { protected String name; - public Colon3ParseNode(SourceIndexLength position, Rope name) { + public Colon3ParseNode(SourceIndexLength position, TruffleString name) { super(position); - this.name = name.getJavaString(); + this.name = name.toJavaStringUncached(); } @Override diff --git a/src/main/java/org/truffleruby/parser/ast/ConstDeclParseNode.java b/src/main/java/org/truffleruby/parser/ast/ConstDeclParseNode.java index a71c1219ab58..f2ff2d5d6811 100644 --- a/src/main/java/org/truffleruby/parser/ast/ConstDeclParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/ConstDeclParseNode.java @@ -33,7 +33,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.INameNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -44,11 +44,15 @@ public class ConstDeclParseNode extends AssignableParseNode implements INameNode private final INameNode constNode; // TODO: Split this into two sub-classes so that name and constNode can be specified separately. - public ConstDeclParseNode(SourceIndexLength position, Rope name, INameNode constNode, ParseNode valueNode) { + public ConstDeclParseNode( + SourceIndexLength position, + TruffleString name, + INameNode constNode, + ParseNode valueNode) { super(position, valueNode); assert constNode != null || (name != null && !name.isEmpty()); - this.name = name == null ? null : name.getJavaString(); + this.name = name == null ? null : name.toJavaStringUncached(); this.constNode = constNode; } diff --git a/src/main/java/org/truffleruby/parser/ast/ConstParseNode.java b/src/main/java/org/truffleruby/parser/ast/ConstParseNode.java index ef2bea43e41f..7f3747008bb7 100644 --- a/src/main/java/org/truffleruby/parser/ast/ConstParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/ConstParseNode.java @@ -33,7 +33,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.INameNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -42,8 +42,8 @@ public class ConstParseNode extends ParseNode implements INameNode { private String name; - public ConstParseNode(SourceIndexLength position, Rope name) { - this(position, name.getJavaString()); + public ConstParseNode(SourceIndexLength position, TruffleString name) { + this(position, name.toJavaStringUncached()); } public ConstParseNode(SourceIndexLength position, String name) { diff --git a/src/main/java/org/truffleruby/parser/ast/DefnParseNode.java b/src/main/java/org/truffleruby/parser/ast/DefnParseNode.java index 18051e3537e3..19c199047d1d 100644 --- a/src/main/java/org/truffleruby/parser/ast/DefnParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/DefnParseNode.java @@ -34,7 +34,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.INameNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -44,7 +44,7 @@ public class DefnParseNode extends MethodDefParseNode implements INameNode { public DefnParseNode( SourceIndexLength position, - Rope name, + TruffleString name, ArgsParseNode argsNode, StaticScope scope, ParseNode bodyNode) { diff --git a/src/main/java/org/truffleruby/parser/ast/DefsParseNode.java b/src/main/java/org/truffleruby/parser/ast/DefsParseNode.java index db319a4c0b65..754c135983b3 100644 --- a/src/main/java/org/truffleruby/parser/ast/DefsParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/DefsParseNode.java @@ -33,7 +33,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.INameNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -46,7 +46,7 @@ public class DefsParseNode extends MethodDefParseNode implements INameNode { public DefsParseNode( SourceIndexLength position, ParseNode receiverNode, - Rope name, + TruffleString name, ArgsParseNode argsNode, StaticScope scope, ParseNode bodyNode) { diff --git a/src/main/java/org/truffleruby/parser/ast/FileParseNode.java b/src/main/java/org/truffleruby/parser/ast/FileParseNode.java index 5b9fd1787656..1c7a9a7bbc4f 100644 --- a/src/main/java/org/truffleruby/parser/ast/FileParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/FileParseNode.java @@ -28,12 +28,13 @@ ***** END LICENSE BLOCK *****/ package org.truffleruby.parser.ast; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; +import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.language.SourceIndexLength; /** Represents __FILE__ nodes */ public class FileParseNode extends StrParseNode implements SideEffectFree { - public FileParseNode(SourceIndexLength position, Rope value) { - super(position, value); + public FileParseNode(SourceIndexLength position, TruffleString value, RubyEncoding encoding) { + super(position, value, encoding); } } diff --git a/src/main/java/org/truffleruby/parser/ast/GlobalAsgnParseNode.java b/src/main/java/org/truffleruby/parser/ast/GlobalAsgnParseNode.java index 3e936f9e5af7..fb4e4172fda6 100644 --- a/src/main/java/org/truffleruby/parser/ast/GlobalAsgnParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/GlobalAsgnParseNode.java @@ -35,7 +35,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.INameNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -44,10 +44,10 @@ public class GlobalAsgnParseNode extends AssignableParseNode implements INameNode { private String name; - public GlobalAsgnParseNode(SourceIndexLength position, Rope name, ParseNode valueNode) { + public GlobalAsgnParseNode(SourceIndexLength position, TruffleString name, ParseNode valueNode) { super(position, valueNode); - this.name = name.getJavaString(); + this.name = name.toJavaStringUncached(); } @Override diff --git a/src/main/java/org/truffleruby/parser/ast/GlobalVarParseNode.java b/src/main/java/org/truffleruby/parser/ast/GlobalVarParseNode.java index 16d8baee843f..068792617ea1 100644 --- a/src/main/java/org/truffleruby/parser/ast/GlobalVarParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/GlobalVarParseNode.java @@ -33,7 +33,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.INameNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -42,8 +42,8 @@ public class GlobalVarParseNode extends ParseNode implements INameNode { private String name; - public GlobalVarParseNode(SourceIndexLength position, Rope name) { - this(position, name.getJavaString()); + public GlobalVarParseNode(SourceIndexLength position, TruffleString name) { + this(position, name.toJavaStringUncached()); } public GlobalVarParseNode(SourceIndexLength position, String name) { diff --git a/src/main/java/org/truffleruby/parser/ast/InstAsgnParseNode.java b/src/main/java/org/truffleruby/parser/ast/InstAsgnParseNode.java index c16785c64621..052d65e1b9f9 100644 --- a/src/main/java/org/truffleruby/parser/ast/InstAsgnParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/InstAsgnParseNode.java @@ -34,7 +34,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.INameNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -45,10 +45,10 @@ public class InstAsgnParseNode extends AssignableParseNode implements INameNode /** @param name the name of the instance variable * @param valueNode the value of the variable **/ - public InstAsgnParseNode(SourceIndexLength position, Rope name, ParseNode valueNode) { + public InstAsgnParseNode(SourceIndexLength position, TruffleString name, ParseNode valueNode) { super(position, valueNode); - this.name = name.getJavaString(); + this.name = name.toJavaStringUncached(); } @Override diff --git a/src/main/java/org/truffleruby/parser/ast/InstVarParseNode.java b/src/main/java/org/truffleruby/parser/ast/InstVarParseNode.java index 80f827ccf9ff..b96e8cd75bf1 100644 --- a/src/main/java/org/truffleruby/parser/ast/InstVarParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/InstVarParseNode.java @@ -34,7 +34,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.INameNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -43,8 +43,8 @@ public class InstVarParseNode extends ParseNode implements INameNode, SideEffectFree { private String name; - public InstVarParseNode(SourceIndexLength position, Rope name) { - this(position, name.getJavaString()); + public InstVarParseNode(SourceIndexLength position, TruffleString name) { + this(position, name.toJavaStringUncached()); } public InstVarParseNode(SourceIndexLength position, String name) { diff --git a/src/main/java/org/truffleruby/parser/ast/LiteralParseNode.java b/src/main/java/org/truffleruby/parser/ast/LiteralParseNode.java index a5d798dd6bcc..2880b9f14946 100644 --- a/src/main/java/org/truffleruby/parser/ast/LiteralParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/LiteralParseNode.java @@ -28,7 +28,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -39,10 +39,10 @@ public class LiteralParseNode extends ParseNode implements InvisibleNode { private String name; - public LiteralParseNode(SourceIndexLength position, Rope name) { + public LiteralParseNode(SourceIndexLength position, TruffleString name) { super(position); - this.name = name.getJavaString(); + this.name = name.toJavaStringUncached(); } public String getName() { diff --git a/src/main/java/org/truffleruby/parser/ast/MethodDefParseNode.java b/src/main/java/org/truffleruby/parser/ast/MethodDefParseNode.java index 8b1d663658e7..c0e208c3319f 100644 --- a/src/main/java/org/truffleruby/parser/ast/MethodDefParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/MethodDefParseNode.java @@ -32,7 +32,7 @@ * Base class for DefnParseNode and DefsParseNode */ -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.INameNode; import org.truffleruby.parser.scope.StaticScope; @@ -45,7 +45,7 @@ public abstract class MethodDefParseNode extends ParseNode implements INameNode, public MethodDefParseNode( SourceIndexLength position, - Rope name, + TruffleString name, ArgsParseNode argsNode, StaticScope scope, ParseNode bodyNode) { @@ -53,7 +53,7 @@ public MethodDefParseNode( assert bodyNode != null : "bodyNode must not be null"; - this.name = name.getJavaString(); + this.name = name.toJavaStringUncached(); this.argsNode = argsNode; this.scope = scope; this.bodyNode = bodyNode; diff --git a/src/main/java/org/truffleruby/parser/ast/OpAsgnConstDeclParseNode.java b/src/main/java/org/truffleruby/parser/ast/OpAsgnConstDeclParseNode.java index 10df4eb3ab09..71d173de9a2b 100644 --- a/src/main/java/org/truffleruby/parser/ast/OpAsgnConstDeclParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/OpAsgnConstDeclParseNode.java @@ -28,17 +28,17 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.visitor.NodeVisitor; /** A::B ||= 1 */ public class OpAsgnConstDeclParseNode extends ParseNode implements BinaryOperatorParseNode { private ParseNode lhs; - private Rope operator; + private TruffleString operator; private ParseNode rhs; - public OpAsgnConstDeclParseNode(SourceIndexLength position, ParseNode lhs, Rope operator, ParseNode rhs) { + public OpAsgnConstDeclParseNode(SourceIndexLength position, ParseNode lhs, TruffleString operator, ParseNode rhs) { super(position); this.lhs = lhs; @@ -57,7 +57,7 @@ public ParseNode getSecondNode() { } public String getOperator() { - return operator.getJavaString(); + return operator.toJavaStringUncached(); } @Override diff --git a/src/main/java/org/truffleruby/parser/ast/RegexpParseNode.java b/src/main/java/org/truffleruby/parser/ast/RegexpParseNode.java index 69a73ed94a5b..35b57cb0274a 100644 --- a/src/main/java/org/truffleruby/parser/ast/RegexpParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/RegexpParseNode.java @@ -33,27 +33,27 @@ import java.util.List; -import org.jcodings.Encoding; +import org.truffleruby.core.encoding.RubyEncoding; import org.truffleruby.core.regexp.RegexpOptions; -import org.truffleruby.core.rope.Rope; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.ILiteralNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; /** Represents a simple regular expression literal. */ public class RegexpParseNode extends ParseNode implements ILiteralNode { - private final Rope value; + private final TStringWithEncoding value; private final RegexpOptions options; - public RegexpParseNode(SourceIndexLength position, Rope value, RegexpOptions options) { + public RegexpParseNode(SourceIndexLength position, TStringWithEncoding value, RegexpOptions options) { super(position); this.value = value; this.options = options; } - public Encoding getEncoding() { - return value.getEncoding(); + public RubyEncoding getRubyEncoding() { + return value.encoding; } @Override @@ -71,10 +71,8 @@ public RegexpOptions getOptions() { return options; } - /** Gets the value. - * - * @return Returns a Rope */ - public Rope getValue() { + /** Gets the regexp source string. */ + public TStringWithEncoding getValue() { return value; } diff --git a/src/main/java/org/truffleruby/parser/ast/StrParseNode.java b/src/main/java/org/truffleruby/parser/ast/StrParseNode.java index 504297e4144d..3e19a71d86c8 100644 --- a/src/main/java/org/truffleruby/parser/ast/StrParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/StrParseNode.java @@ -34,44 +34,42 @@ import java.util.List; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; +import com.oracle.truffle.api.strings.TruffleString; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.string.TStringBuilder; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.ILiteralNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; /** Representing a simple String literal. */ public class StrParseNode extends ParseNode implements ILiteralNode, SideEffectFree { - private Rope value; - private final CodeRange codeRange; + private TruffleString value; + public final RubyEncoding encoding; private boolean frozen; - public StrParseNode(SourceIndexLength position, Rope value) { - this(position, value, value.getCodeRange()); + public StrParseNode(SourceIndexLength position, TStringWithEncoding tStringWithEnc) { + this(position, tStringWithEnc.tstring, tStringWithEnc.encoding); } - public StrParseNode(SourceIndexLength position, Rope value, CodeRange codeRange) { + public StrParseNode(SourceIndexLength position, TruffleString value, RubyEncoding encoding) { super(position); this.value = value; - this.codeRange = codeRange; + this.encoding = encoding; } public StrParseNode(SourceIndexLength position, StrParseNode head, StrParseNode tail) { super(position); - Rope headBL = head.getValue(); - Rope tailBL = tail.getValue(); - - RopeBuilder myValue = new RopeBuilder(); - myValue.setEncoding(headBL.getEncoding()); - myValue.append(headBL); - myValue.append(tailBL); + TStringBuilder myValue = new TStringBuilder(); + myValue.setEncoding(head.encoding); + myValue.append(head.value, head.encoding); + myValue.append(tail.value, tail.encoding); frozen = head.isFrozen() && tail.isFrozen(); - value = myValue.toRope(); - codeRange = value.getCodeRange(); + value = myValue.toTString(); + encoding = head.encoding; } @Override @@ -90,15 +88,12 @@ public T accept(NodeVisitor iVisitor) { /** Gets the value. * * @return Returns a String */ - public Rope getValue() { + public TruffleString getValue() { return value; } - /** Get the string's coderange. - * - * @return the string's coderange */ - public CodeRange getCodeRange() { - return codeRange; + public TStringWithEncoding getTStringWithEncoding() { + return new TStringWithEncoding(value, encoding); } @Override @@ -114,7 +109,8 @@ public void setFrozen(boolean frozen) { this.frozen = frozen; } - public void setValue(Rope value) { + public void setValue(TruffleString value) { + assert value.isCompatibleTo(encoding.tencoding); this.value = value; } } diff --git a/src/main/java/org/truffleruby/parser/ast/SymbolParseNode.java b/src/main/java/org/truffleruby/parser/ast/SymbolParseNode.java index b107bb924fb8..278fecc61946 100644 --- a/src/main/java/org/truffleruby/parser/ast/SymbolParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/SymbolParseNode.java @@ -34,12 +34,13 @@ import java.util.List; +import com.oracle.truffle.api.strings.TruffleString; import org.jcodings.Encoding; import org.jcodings.specific.USASCIIEncoding; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.string.StringGuards; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.ILiteralNode; import org.truffleruby.parser.ast.types.INameNode; @@ -49,34 +50,36 @@ public class SymbolParseNode extends ParseNode implements ILiteralNode, INameNode, SideEffectFree { private final String name; - private final Rope rope; + private final TruffleString tstring; + private final Encoding encoding; // Interned ident path (e.g. [':', ident]). - public SymbolParseNode(SourceIndexLength position, String name, Encoding encoding, CodeRange cr) { + public SymbolParseNode(SourceIndexLength position, String name, Encoding encoding) { super(position); this.name = name; // Assumed all names are already intern'd by lexer. - assert cr != CodeRange.CR_UNKNOWN; - - if (cr == CodeRange.CR_7BIT) { + RubyEncoding rubyEncoding = Encodings.getBuiltInEncoding(encoding); + this.tstring = TStringUtils.fromJavaString(name, rubyEncoding); + if (StringGuards.is7BitUncached(tstring, rubyEncoding)) { encoding = USASCIIEncoding.INSTANCE; } - - this.rope = StringOperations.encodeRope(name, encoding, cr); + this.encoding = encoding; } // String path (e.g. [':', str_beg, str_content, str_end]) - public SymbolParseNode(SourceIndexLength position, Rope value) { + public SymbolParseNode(SourceIndexLength position, TruffleString value, RubyEncoding rubyEncoding) { super(position); - if (value.isAsciiOnly()) { - rope = RopeOperations.withEncoding(value, USASCIIEncoding.INSTANCE); + if (StringGuards.is7BitUncached(value, rubyEncoding)) { + tstring = value.switchEncodingUncached(Encodings.US_ASCII.tencoding); + rubyEncoding = Encodings.US_ASCII; } else { - rope = value; + tstring = value; } + this.encoding = rubyEncoding.jcoding; - //intern() to allow identity checks for caching - this.name = RopeOperations.decodeRope(rope).intern(); + // intern() to allow identity checks for caching + this.name = tstring.toJavaStringUncached().intern(); } @Override @@ -96,8 +99,24 @@ public String getName() { return name; } - public Rope getRope() { - return rope; + public TruffleString getTString() { + return tstring; + } + + public Encoding getEncoding() { + return encoding; + } + + public RubyEncoding getRubyEncoding() { + return Encodings.getBuiltInEncoding(encoding); + } + + public boolean valueEquals(ILiteralNode o) { + if (!(o instanceof SymbolParseNode)) { + return false; + } + SymbolParseNode other = (SymbolParseNode) o; + return tstring.equals(other.tstring) && encoding == other.encoding; } @Override diff --git a/src/main/java/org/truffleruby/parser/ast/VAliasParseNode.java b/src/main/java/org/truffleruby/parser/ast/VAliasParseNode.java index e36cac068370..735d5b77cae7 100644 --- a/src/main/java/org/truffleruby/parser/ast/VAliasParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/VAliasParseNode.java @@ -33,7 +33,7 @@ import java.util.List; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.visitor.NodeVisitor; @@ -42,10 +42,10 @@ public class VAliasParseNode extends ParseNode { private String oldName; private String newName; - public VAliasParseNode(SourceIndexLength position, Rope newName, Rope oldName) { + public VAliasParseNode(SourceIndexLength position, TruffleString newName, TruffleString oldName) { super(position); - this.oldName = oldName.getJavaString(); - this.newName = newName.getJavaString(); + this.oldName = oldName.toJavaStringUncached(); + this.newName = newName.toJavaStringUncached(); } @Override diff --git a/src/main/java/org/truffleruby/parser/ast/XStrParseNode.java b/src/main/java/org/truffleruby/parser/ast/XStrParseNode.java index 6d988893f900..11878e41ccca 100644 --- a/src/main/java/org/truffleruby/parser/ast/XStrParseNode.java +++ b/src/main/java/org/truffleruby/parser/ast/XStrParseNode.java @@ -33,23 +33,23 @@ import java.util.List; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeConstants; +import com.oracle.truffle.api.strings.TruffleString; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.ast.types.ILiteralNode; import org.truffleruby.parser.ast.visitor.NodeVisitor; /** A Backtick(`) string */ public class XStrParseNode extends ParseNode implements ILiteralNode { - private final Rope value; - private CodeRange coderange; + private final TruffleString value; + public final RubyEncoding encoding; - public XStrParseNode(SourceIndexLength position, Rope value, CodeRange coderange) { - // FIXME: Shouldn't this have codeRange like StrParseNode? + public XStrParseNode(SourceIndexLength position, StrParseNode strParseNode) { super(position); - this.value = (value == null ? RopeConstants.EMPTY_US_ASCII_ROPE : value); - this.coderange = coderange; + this.value = strParseNode == null ? TStringConstants.EMPTY_US_ASCII : strParseNode.getValue(); + this.encoding = strParseNode == null ? Encodings.US_ASCII : strParseNode.encoding; } @Override @@ -68,14 +68,10 @@ public T accept(NodeVisitor iVisitor) { /** Gets the value. * * @return Returns a String */ - public Rope getValue() { + public TruffleString getValue() { return value; } - public CodeRange getCodeRange() { - return coderange; - } - @Override public List childNodes() { return EMPTY_LIST; diff --git a/src/main/java/org/truffleruby/parser/lexer/HeredocTerm.java b/src/main/java/org/truffleruby/parser/lexer/HeredocTerm.java index f00cc7de0492..b917bc8bb6f5 100644 --- a/src/main/java/org/truffleruby/parser/lexer/HeredocTerm.java +++ b/src/main/java/org/truffleruby/parser/lexer/HeredocTerm.java @@ -35,10 +35,9 @@ import static org.truffleruby.parser.lexer.RubyLexer.STR_FUNC_INDENT; import static org.truffleruby.parser.lexer.RubyLexer.STR_FUNC_TERM; +import com.oracle.truffle.api.strings.TruffleString; import org.jcodings.Encoding; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.TStringBuilder; import org.truffleruby.parser.parser.RubyParser; /** A lexing unit for scanning a heredoc element. Example: @@ -55,7 +54,7 @@ */ public final class HeredocTerm extends StrTerm { /** End marker delimiting heredoc boundary. */ - private final Rope nd_lit; + private final TruffleString nd_lit; /** Indicates whether string interpolation (expansion) should be performed, and the identation of the end marker. */ private final int flags; @@ -67,9 +66,9 @@ public final class HeredocTerm extends StrTerm { final int line; /** Portion of the line where the end marker is declarer, from right after the marker until the end of the line. */ - final Rope lastLine; + final TruffleString lastLine; - public HeredocTerm(Rope marker, int func, int nth, int line, Rope lastLine) { + public HeredocTerm(TruffleString marker, int func, int nth, int line, TruffleString lastLine) { this.nd_lit = marker; this.flags = func; this.nth = nth; @@ -82,8 +81,8 @@ public int getFlags() { return flags; } - protected int error(RubyLexer lexer, Rope eos) { - lexer.compile_error("can't find string \"" + RopeOperations.decodeRope(eos) + "\" anywhere before EOF"); + protected int error(RubyLexer lexer, TruffleString eos) { + lexer.compile_error("can't find string \"" + eos.toJavaStringUncached() + "\" anywhere before EOF"); return -1; } @@ -96,7 +95,7 @@ private int restore(RubyLexer lexer) { @Override public int parseString(RubyLexer lexer) { - RopeBuilder str = null; + TStringBuilder str = null; boolean indent = (flags & STR_FUNC_INDENT) != 0; int c = lexer.nextc(); @@ -105,7 +104,7 @@ public int parseString(RubyLexer lexer) { } // Found end marker for this heredoc, at the start of a line - if (lexer.was_bol() && lexer.whole_match_p(this.nd_lit, indent)) { + if (lexer.was_bol() && lexer.whole_match_p(this.nd_lit, lexer.tencoding, indent)) { lexer.heredoc_restore(this); // will also skip over the end marker lexer.setStrTerm(null); lexer.setState(EXPR_END); @@ -116,7 +115,7 @@ public int parseString(RubyLexer lexer) { // heredocs without string interpolation do { // iterate on lines, while end marker not found - final Rope lbuf = lexer.lexb; + final TruffleString lbuf = lexer.lexb; int pend = lexer.lex_pend; // Remove trailing newline, it will be appended later in normalized form (single \n). @@ -143,12 +142,14 @@ public int parseString(RubyLexer lexer) { lexer.setHeredocLineIndent(0); } + var bytes = lbuf.getInternalByteArrayUncached(lexer.getTEncoding()); if (str != null) { - str.append(lbuf.getBytes(), 0, pend); + str.append(bytes.getArray(), bytes.getOffset(), pend); } else { // lazy initialization of string builder - final RopeBuilder builder = RopeBuilder.createRopeBuilder(lbuf.getBytes(), 0, pend); - builder.setEncoding(lbuf.getEncoding()); + final TStringBuilder builder = TStringBuilder.create(bytes.getArray(), bytes.getOffset(), + pend); + builder.setEncoding(lexer.encoding); str = builder; } @@ -166,12 +167,12 @@ public int parseString(RubyLexer lexer) { if (lexer.nextc() == -1) { return error(lexer, nd_lit); } - } while (!lexer.whole_match_p(nd_lit, indent)); + } while (!lexer.whole_match_p(nd_lit, lexer.tencoding, indent)); } else { // heredoc with string interpolation - RopeBuilder tok = new RopeBuilder(); - tok.setEncoding(lexer.getEncoding()); + TStringBuilder tok = new TStringBuilder(); + tok.setEncoding(lexer.encoding); if (c == '#') { // interpolated variable or block begin @@ -226,7 +227,7 @@ public int parseString(RubyLexer lexer) { return error(lexer, nd_lit); } // NOTE: The end marker is not processed here, but in the next call to HeredocTerm#parseString - } while (!lexer.whole_match_p(nd_lit, indent)); + } while (!lexer.whole_match_p(nd_lit, lexer.tencoding, indent)); str = tok; } diff --git a/src/main/java/org/truffleruby/parser/lexer/LexerSource.java b/src/main/java/org/truffleruby/parser/lexer/LexerSource.java index c52b07b02445..ac1d3db87805 100644 --- a/src/main/java/org/truffleruby/parser/lexer/LexerSource.java +++ b/src/main/java/org/truffleruby/parser/lexer/LexerSource.java @@ -36,13 +36,12 @@ ***** END LICENSE BLOCK *****/ package org.truffleruby.parser.lexer; -import static org.truffleruby.core.rope.CodeRange.CR_UNKNOWN; - -import java.nio.charset.StandardCharsets; - +import com.oracle.truffle.api.strings.TruffleString; import org.jcodings.Encoding; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.parser.RubySource; import org.truffleruby.parser.parser.ParserRopeOperations; @@ -50,28 +49,37 @@ public class LexerSource { - private final ParserRopeOperations parserRopeOperations = new ParserRopeOperations(); + public ParserRopeOperations parserRopeOperations; private final Source source; private final String sourcePath; - private final boolean fromRope; + private final boolean fromTruffleString; - private Rope sourceBytes; + private TruffleString sourceBytes; + private final int sourceByteLength; + private RubyEncoding encoding; private int byteOffset; private final int lineOffset; - public LexerSource(RubySource rubySource, Encoding encoding) { + public LexerSource(RubySource rubySource) { this.source = rubySource.getSource(); this.sourcePath = rubySource.getSourcePath(); - fromRope = rubySource.getRope() != null; + fromTruffleString = rubySource.hasTruffleString(); - if (fromRope) { - this.sourceBytes = rubySource.getRope(); + final RubyEncoding rubyEncoding; + if (fromTruffleString) { + rubyEncoding = rubySource.getEncoding(); + this.sourceBytes = rubySource.getTruffleString(); } else { - // TODO CS 5-Sep-17 can we get the bytes directly rather than using getCharacters -> toString -> getBytes? - this.sourceBytes = RopeOperations - .create(source.getCharacters().toString().getBytes(StandardCharsets.UTF_8), encoding, CR_UNKNOWN); + rubyEncoding = Encodings.UTF_8; + // TODO CS 5-Sep-17 can we get the bytes directly rather than using getCharacters -> toString -> getBytes? + var sourceString = source.getCharacters().toString(); + // this.sourceBytes = TStringUtils.fromByteArray(sourceString.getBytes(StandardCharsets.UTF_8), TruffleString.Encoding.UTF_8); + this.sourceBytes = TStringUtils.utf8TString(sourceString); } + this.sourceByteLength = sourceBytes.byteLength(rubyEncoding.tencoding); + this.encoding = rubyEncoding; + parserRopeOperations = new ParserRopeOperations(this.encoding); this.lineOffset = rubySource.getLineOffset(); } @@ -84,26 +92,33 @@ public String getSourcePath() { } public Encoding getEncoding() { - return sourceBytes.getEncoding(); + return encoding.jcoding; + } + + public RubyEncoding getRubyEncoding() { + return encoding; } - public void setEncoding(Encoding encoding) { - sourceBytes = parserRopeOperations.withEncoding(sourceBytes, encoding); + public void setEncoding(Encoding jcoding) { + var rubyEncoding = Encodings.getBuiltInEncoding(jcoding); + this.sourceBytes = sourceBytes.forceEncodingUncached(this.encoding.tencoding, rubyEncoding.tencoding); + this.encoding = rubyEncoding; + this.parserRopeOperations = new ParserRopeOperations(this.encoding); } public int getOffset() { return byteOffset; } - public Rope gets() { - if (byteOffset >= sourceBytes.byteLength()) { + public TruffleString gets() { + if (byteOffset >= sourceByteLength) { return null; } int lineEnd = nextNewLine() + 1; if (lineEnd == 0) { - lineEnd = sourceBytes.byteLength(); + lineEnd = sourceByteLength; } final int start = byteOffset; @@ -111,28 +126,21 @@ public Rope gets() { byteOffset = lineEnd; - final Rope line = parserRopeOperations.makeShared(sourceBytes, start, length); - assert line.getEncoding() == sourceBytes.getEncoding(); - return line; + return parserRopeOperations.makeShared(sourceBytes, start, length); } private int nextNewLine() { - int n = byteOffset; - - final byte[] bytes = sourceBytes.getBytes(); - while (n < bytes.length) { - if (bytes[n] == '\n') { - return n; - } - - n++; + int index = sourceBytes.byteIndexOfAnyByteUncached(byteOffset, sourceByteLength, + TStringConstants.NEWLINE_BYTE_ARRAY, encoding.tencoding); + if (index < 0) { + return -1; + } else { + return index; } - - return -1; } - public boolean isFromRope() { - return fromRope; + public boolean isFromTruffleString() { + return fromTruffleString; } public int getLineOffset() { diff --git a/src/main/java/org/truffleruby/parser/lexer/MagicCommentHandler.java b/src/main/java/org/truffleruby/parser/lexer/MagicCommentHandler.java index 7098e621002d..27f689b9a36d 100644 --- a/src/main/java/org/truffleruby/parser/lexer/MagicCommentHandler.java +++ b/src/main/java/org/truffleruby/parser/lexer/MagicCommentHandler.java @@ -12,10 +12,10 @@ */ package org.truffleruby.parser.lexer; -import org.truffleruby.core.rope.Rope; +import com.oracle.truffle.api.strings.TruffleString; public interface MagicCommentHandler { - boolean onMagicComment(String name, Rope value); + boolean onMagicComment(String name, TruffleString value); } diff --git a/src/main/java/org/truffleruby/parser/lexer/RubyLexer.java b/src/main/java/org/truffleruby/parser/lexer/RubyLexer.java index 0bd074570a67..d5cd4e9418a1 100644 --- a/src/main/java/org/truffleruby/parser/lexer/RubyLexer.java +++ b/src/main/java/org/truffleruby/parser/lexer/RubyLexer.java @@ -43,9 +43,7 @@ ***** END LICENSE BLOCK *****/ package org.truffleruby.parser.lexer; -import static org.truffleruby.core.rope.CodeRange.CR_7BIT; -import static org.truffleruby.core.rope.CodeRange.CR_BROKEN; -import static org.truffleruby.core.rope.CodeRange.CR_UNKNOWN; +import static com.oracle.truffle.api.strings.TruffleString.Encoding.US_ASCII; import static org.truffleruby.core.string.StringSupport.isAsciiSpace; import java.math.BigDecimal; @@ -57,8 +55,10 @@ import java.util.function.BiConsumer; import com.oracle.truffle.api.TruffleSafepoint; +import com.oracle.truffle.api.strings.InternalByteArray; +import com.oracle.truffle.api.strings.TruffleString; +import com.oracle.truffle.api.strings.TruffleString.ErrorHandling; import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; import org.jcodings.specific.USASCIIEncoding; import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyContext; @@ -66,12 +66,14 @@ import org.truffleruby.collections.ByteArrayBuilder; import org.truffleruby.core.DummyNode; import org.truffleruby.core.array.ArrayUtils; -import org.truffleruby.core.rope.BytesKey; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.encoding.EncodingManager; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.string.BytesKey; +import org.truffleruby.core.string.TStringBuilder; +import org.truffleruby.core.string.TStringWithEncoding; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.core.string.StringSupport; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.language.control.RaiseException; @@ -101,8 +103,6 @@ */ public class RubyLexer implements MagicCommentHandler { - private final ParserRopeOperations parserRopeOperations = new ParserRopeOperations(); - private BignumParseNode newBignumNode(String value, int radix) { return new BignumParseNode(getPosition(), new BigInteger(value, radix)); } @@ -182,7 +182,7 @@ public enum Keyword { __ENCODING__("__ENCODING__", RubyParser.keyword__ENCODING__, EXPR_END); public final String name; - public final Rope bytes; + public final TruffleString bytes; public final int id0; public final int id1; public final int state; @@ -196,7 +196,8 @@ private abstract static class Maps { final Map fromBytes = new HashMap<>(); for (Keyword keyword : Keyword.values()) { fromString.put(keyword.name, keyword); - fromBytes.put(new BytesKey(keyword.bytes.getBytes(), null), keyword); + fromBytes.put(new BytesKey(TStringUtils.getBytesOrFail(keyword.bytes, Encodings.US_ASCII), null), + keyword); } FROM_STRING = Collections.unmodifiableMap(fromString); FROM_BYTES = Collections.unmodifiableMap(fromBytes); @@ -209,7 +210,7 @@ private abstract static class Maps { Keyword(String name, int id, int modifier, int state) { this.name = name; - this.bytes = RopeOperations.encodeAscii(name, USASCIIEncoding.INSTANCE); + this.bytes = TStringUtils.usAsciiString(name); this.id0 = id; this.id1 = modifier; this.state = state; @@ -220,8 +221,8 @@ public static Keyword getKeyword(String str) { return Keyword.Maps.FROM_STRING.get(str); } - public static Keyword getKeyword(Rope rope) { - return Keyword.Maps.FROM_BYTES.get(new BytesKey(rope.getBytes(), null)); + public static Keyword getKeyword(TruffleString rope, RubyEncoding encoding) { + return Keyword.Maps.FROM_BYTES.get(new BytesKey(TStringUtils.getBytesOrCopy(rope, encoding), null)); } // Used for tiny smidgen of grammar in lexer (see setParserSupport()) @@ -231,10 +232,10 @@ public static Keyword getKeyword(Rope rope) { private RubyDeferredWarnings warnings; public int tokenize_ident(int result) { - Rope value = createTokenRope(); + TruffleString value = createTokenRope(); if (isLexState(last_state, EXPR_DOT | EXPR_FNAME) && - parserSupport.getCurrentScope().isDefined(value.getJavaString().intern()) >= 0) { + parserSupport.getCurrentScope().isDefined(value.toJavaStringUncached().intern()) >= 0) { setState(EXPR_END); } @@ -260,6 +261,9 @@ public void reset() { // nextc will increment for the first character on the first line ruby_sourceline--; + this.encoding = src.getRubyEncoding(); + this.tencoding = this.encoding.tencoding; + parser_prepare(); } @@ -269,7 +273,7 @@ public int nextc() { return EOF; } - final Rope line = src.gets(); + final TruffleString line = src.gets(); if (line == null) { eofp = true; lex_goto_eol(); @@ -285,7 +289,7 @@ public int nextc() { updateLineOffset(); line_count++; lex_pbeg = lex_p = 0; - lex_pend = lex_p + line.byteLength(); + lex_pend = lex_p + line.byteLength(tencoding); lexb = line; flush(); } @@ -318,7 +322,7 @@ public void heredoc_dedent(ParseNode root) { // Other types of string parse nodes do not need dedentation (e.g. EvStrParseNode) if (root instanceof StrParseNode) { StrParseNode str = (StrParseNode) root; - str.setValue(dedent_string(str.getValue(), indent)); + str.setValue(dedent_string(str.getTStringWithEncoding(), indent)); } else if (root instanceof ListParseNode) { ListParseNode list = (ListParseNode) root; int length = list.size(); @@ -332,7 +336,7 @@ public void heredoc_dedent(ParseNode root) { currentLine = line; if (child instanceof StrParseNode) { final StrParseNode childStrNode = (StrParseNode) child; - childStrNode.setValue(dedent_string(childStrNode.getValue(), indent)); + childStrNode.setValue(dedent_string(childStrNode.getTStringWithEncoding(), indent)); } } } @@ -349,10 +353,10 @@ public void compile_error(SyntaxException.PID pid, String message) { /** Continue parsing after parsing a heredoc: restore the rest of line after the heredoc start marker, also sets * {@link #heredoc_end} to the line where the heredoc ends, so that we can skip the already parsed heredoc. */ void heredoc_restore(HeredocTerm here) { - Rope line = here.lastLine; + TruffleString line = here.lastLine; lexb = line; lex_pbeg = 0; - lex_pend = lex_pbeg + line.byteLength(); + lex_pend = lex_pbeg + line.byteLength(tencoding); lex_p = lex_pbeg + here.nth; heredoc_end = ruby_sourceline; ruby_sourceline = here.line; @@ -403,7 +407,7 @@ public void updateLineOffset() { } } - protected void setCompileOptionFlag(String name, Rope value) { + protected void setCompileOptionFlag(String name, TruffleString value) { if (tokenSeen) { warnings.warning( getFile(), @@ -426,19 +430,12 @@ protected void setCompileOptionFlag(String name, Rope value) { } } - private static final Rope TRUE = RopeOperations - .create(new byte[]{ 't', 'r', 'u', 'e' }, ASCIIEncoding.INSTANCE, CR_7BIT); - private static final Rope FALSE = RopeOperations - .create(new byte[]{ 'f', 'a', 'l', 's', 'e' }, ASCIIEncoding.INSTANCE, CR_7BIT); - - protected int asTruth(String name, Rope value) { - int result = RopeOperations.caseInsensitiveCmp(value, TRUE); - if (result == 0) { + protected int asTruth(String name, TruffleString value) { + if (value.toJavaStringUncached().equalsIgnoreCase("true")) { return 1; } - result = RopeOperations.caseInsensitiveCmp(value, FALSE); - if (result == 0) { + if (value.toJavaStringUncached().equalsIgnoreCase("false")) { return 0; } @@ -446,23 +443,24 @@ protected int asTruth(String name, Rope value) { return -1; } - protected void setTokenInfo(String name, Rope value) { + protected void setTokenInfo(String name, TruffleString value) { } - protected void setEncoding(Rope name) { + protected void setEncoding(TruffleString name) { final RubyContext context = parserSupport.getConfiguration().getContext(); - final Encoding newEncoding = parserSupport.getEncoding(name); + var nameString = name.toJavaStringUncached(); + final Encoding newEncoding = EncodingManager.getEncoding(nameString); if (newEncoding == null) { - throw argumentError(context, "unknown encoding name: " + RopeOperations.decodeRope(name)); + throw argumentError(context, "unknown encoding name: " + nameString); } if (!newEncoding.isAsciiCompatible()) { - throw argumentError(context, RopeOperations.decodeRope(name) + " is not ASCII compatible"); + throw argumentError(context, nameString + " is not ASCII compatible"); } - if (!src.isFromRope() && !isUTF8Subset(newEncoding)) { + if (!src.isFromTruffleString() && !isUTF8Subset(newEncoding)) { /* The source we are lexing came in via a String (or Reader, or File) from the Polyglot API, so we only have * the String - we don't have any access to the original bytes, so we cannot re-interpret them in another * encoding without risking errors. */ @@ -479,7 +477,7 @@ protected void setEncoding(Rope name) { context, String.format( "%s cannot be used as an encoding for a %s as it is not UTF-8 or a subset of UTF-8", - RopeOperations.decodeRope(name), + nameString, description)); } @@ -571,27 +569,25 @@ private int getIntegerToken(String value, int radix, int suffix) { return considerComplex(RubyParser.tINTEGER, suffix); } - public StrParseNode createStr(RopeBuilder buffer, int flags) { - return createStr(buffer.toRope(), flags); + public StrParseNode createStr(TStringBuilder buffer, int flags) { + return createStr(buffer.toTString(), buffer.getRubyEncoding(), flags); } // STR_NEW3/parser_str_new - public StrParseNode createStr(Rope buffer, int flags) { - Encoding bufferEncoding = buffer.getEncoding(); - CodeRange codeRange = buffer.getCodeRange(); + public StrParseNode createStr(TruffleString bufferTString, RubyEncoding bufferEncoding, int flags) { + TStringWithEncoding buffer = new TStringWithEncoding(bufferTString, bufferEncoding); - if ((flags & STR_FUNC_REGEXP) == 0 && bufferEncoding.isAsciiCompatible()) { - // If we have characters outside 7-bit range and we are still ascii then change to ascii-8bit - if (codeRange == CodeRange.CR_7BIT) { + if ((flags & STR_FUNC_REGEXP) == 0 && bufferEncoding.isAsciiCompatible) { + // If we have characters outside 7-bit range and we are still ascii then change to binary + if (buffer.isAsciiOnly()) { // Do nothing like MRI - } else if (getEncoding() == USASCIIEncoding.INSTANCE && - bufferEncoding != UTF8Encoding.INSTANCE) { - codeRange = associateEncoding(buffer, ASCIIEncoding.INSTANCE, codeRange); - buffer = parserRopeOperations.withEncoding(buffer, ASCIIEncoding.INSTANCE); + } else if (encoding == Encodings.US_ASCII && bufferEncoding != Encodings.UTF_8) { + assert !buffer.isAsciiOnly(); + buffer = buffer.forceEncoding(Encodings.BINARY); } } - StrParseNode newStr = new StrParseNode(getPosition(), buffer, codeRange); + StrParseNode newStr = new StrParseNode(getPosition(), buffer); if (parserSupport.getConfiguration().isFrozenStringLiteral()) { newStr.setFrozen(true); @@ -600,20 +596,6 @@ public StrParseNode createStr(Rope buffer, int flags) { return newStr; } - public static CodeRange associateEncoding(Rope buffer, Encoding newEncoding, CodeRange codeRange) { - Encoding bufferEncoding = buffer.getEncoding(); - - if (newEncoding == bufferEncoding) { - return codeRange; - } - - if (codeRange != CodeRange.CR_7BIT || !newEncoding.isAsciiCompatible()) { - return CodeRange.CR_UNKNOWN; - } - - return codeRange; - } - /** What type/kind of quote are we dealing with? * * @param c first character the the quote construct @@ -725,7 +707,7 @@ private int hereDocumentIdentifier() { indent = Integer.MAX_VALUE; } - Rope markerValue; // the value that marks the end of the heredoc + TruffleString markerValue; // the value that marks the end of the heredoc if (c == '\'' || c == '"' || c == '`') { // the marker is quoted @@ -791,12 +773,12 @@ private int hereDocumentIdentifier() { lex_strterm = new HeredocTerm(markerValue, func, len, ruby_sourceline, lexb); if (term == '`') { - yaccValue = RopeConstants.BACKTICK; + yaccValue = TStringConstants.BACKTICK; flush(); return RubyParser.tXSTRING_BEG; // marks the beggining of a backtick string in the parser } - yaccValue = RopeConstants.QQ; // double quote + yaccValue = TStringConstants.QQ; // double quote heredoc_indent = indent; // 0 if [<<-], MAX_VALUE if [<<~] heredoc_line_indent = 0; flush(); @@ -861,7 +843,8 @@ private int yylex() { // verbose is not known at this point and we don't want to remove the tokenSeen check because it would // affect lexer performance. if (!tokenSeen) { - if (!parser_magic_comment(lexb, lex_p, lex_pend - lex_p, parserRopeOperations, this)) { + if (!parser_magic_comment(new TStringWithEncoding(lexb, encoding), lex_p, lex_pend - lex_p, + src.parserRopeOperations, this)) { if (comment_at_top()) { set_file_encoding(lex_p, lex_pend); } @@ -932,9 +915,9 @@ private int yylex() { // documentation nodes if (was_bol()) { if (strncmp( - parserRopeOperations.makeShared(lexb, lex_p, lex_pend - lex_p), + src.parserRopeOperations.makeShared(lexb, lex_p, lex_pend - lex_p), BEGIN_DOC_MARKER, - BEGIN_DOC_MARKER.byteLength()) && + BEGIN_DOC_MARKER.byteLength(TruffleString.Encoding.BYTES)) && Character.isWhitespace(p(lex_p + 5))) { for (;;) { lex_goto_eol(); @@ -951,9 +934,9 @@ private int yylex() { } if (strncmp( - parserRopeOperations.makeShared(lexb, lex_p, lex_pend - lex_p), + src.parserRopeOperations.makeShared(lexb, lex_p, lex_pend - lex_p), END_DOC_MARKER, - END_DOC_MARKER.byteLength()) && + END_DOC_MARKER.byteLength(TruffleString.Encoding.BYTES)) && (lex_p + 3 == lex_pend || Character.isWhitespace(p(lex_p + 3)))) { break; } @@ -970,22 +953,22 @@ private int yylex() { if (c == '=') { c = nextc(); if (c == '=') { - yaccValue = RopeConstants.EQ_EQ_EQ; + yaccValue = TStringConstants.EQ_EQ_EQ; return RubyParser.tEQQ; } pushback(c); - yaccValue = RopeConstants.EQ_EQ; + yaccValue = TStringConstants.EQ_EQ; return RubyParser.tEQ; } if (c == '~') { - yaccValue = RopeConstants.EQ_TILDE; + yaccValue = TStringConstants.EQ_TILDE; return RubyParser.tMATCH; } else if (c == '>') { - yaccValue = RopeConstants.EQ_GT; + yaccValue = TStringConstants.EQ_GT; return RubyParser.tASSOC; } pushback(c); - yaccValue = RopeConstants.EQ; + yaccValue = TStringConstants.EQ; return '='; case '<': @@ -1036,7 +1019,7 @@ private int yylex() { case ';': commandStart = true; setState(EXPR_BEG); - yaccValue = RopeConstants.SEMICOLON; + yaccValue = TStringConstants.SEMICOLON; return ';'; case ',': return comma(c); @@ -1055,7 +1038,7 @@ private int yylex() { continue; } pushback(c); - yaccValue = RopeConstants.BACKSLASH; + yaccValue = TStringConstants.BACKSLASH; return '\\'; case '%': return percent(spaceSeen); @@ -1064,7 +1047,7 @@ private int yylex() { case '@': return at(); case '_': - if (was_bol() && whole_match_p(END_MARKER, false)) { + if (was_bol() && whole_match_p(END_MARKER, TruffleString.Encoding.BYTES, false)) { endPosition = src.getOffset(); eofp = true; @@ -1078,9 +1061,9 @@ private int yylex() { } } - private int identifierToken(int result, Rope value) { + private int identifierToken(int result, TruffleString value) { if (result == RubyParser.tIDENTIFIER && !isLexState(last_state, EXPR_DOT | EXPR_FNAME) && - parserSupport.getCurrentScope().isDefined(value.getJavaString().intern()) >= 0) { + parserSupport.getCurrentScope().isDefined(value.toJavaStringUncached().intern()) >= 0) { setState(EXPR_END | EXPR_LABEL); } @@ -1095,20 +1078,20 @@ private int ampersand(boolean spaceSeen) { case '&': setState(EXPR_BEG); if ((c = nextc()) == '=') { - yaccValue = RopeConstants.AMPERSAND_AMPERSAND; + yaccValue = TStringConstants.AMPERSAND_AMPERSAND; setState(EXPR_BEG); return RubyParser.tOP_ASGN; } pushback(c); - yaccValue = RopeConstants.AMPERSAND_AMPERSAND; + yaccValue = TStringConstants.AMPERSAND_AMPERSAND; return RubyParser.tANDOP; case '=': - yaccValue = RopeConstants.AMPERSAND; + yaccValue = TStringConstants.AMPERSAND; setState(EXPR_BEG); return RubyParser.tOP_ASGN; case '.': setState(EXPR_DOT); - yaccValue = RopeConstants.AMPERSAND_DOT; + yaccValue = TStringConstants.AMPERSAND_DOT; return RubyParser.tANDDOT; } pushback(c); @@ -1132,29 +1115,32 @@ private int ampersand(boolean spaceSeen) { setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG); - yaccValue = RopeConstants.AMPERSAND; + yaccValue = TStringConstants.AMPERSAND; return c; } - private static boolean hasShebangLine(byte[] bytes) { - return bytes.length > 2 && bytes[0] == '#' && bytes[1] == '!'; + private static boolean hasShebangLine(InternalByteArray bytes) { + return bytes.getLength() > 2 && bytes.get(0) == '#' && bytes.get(1) == '!'; } - private static int newLineIndex(byte[] bytes, int start) { - for (int i = start; i < bytes.length; i++) { - if (bytes[i] == '\n') { - return i; - } + private static int newLineIndex(InternalByteArray bytes, int start) { + int index = com.oracle.truffle.api.ArrayUtils.indexOf( + bytes.getArray(), + bytes.getOffset() + start, + bytes.getEnd(), + (byte) '\n'); + if (index < 0) { + return bytes.getLength(); + } else { + return index - bytes.getOffset(); } - - return bytes.length; } /** Peak in source to see if there is a magic comment. This is used by eval() & friends to know the actual encoding * of the source code, and be able to convert to a Java String faithfully. */ - public static void parseMagicComment(Rope source, BiConsumer magicCommentHandler) { - final byte[] bytes = source.getBytes(); - final int length = source.byteLength(); + public static void parseMagicComment(TStringWithEncoding source, BiConsumer magicCommentHandler) { + var bytes = source.getInternalByteArray(); + final int length = bytes.getLength(); int start = 0; if (hasShebangLine(bytes)) { @@ -1162,11 +1148,11 @@ public static void parseMagicComment(Rope source, BiConsumer magic } // Skip leading spaces but don't jump to another line - while (start < length && isAsciiSpace(bytes[start]) && bytes[start] != '\n') { + while (start < length && isAsciiSpace(bytes.get(start)) && bytes.get(start) != '\n') { start++; } - if (start < length && bytes[start] == '#') { + if (start < length && bytes.get(start) == '#') { start++; final int magicLineStart = start; @@ -1176,17 +1162,19 @@ public static void parseMagicComment(Rope source, BiConsumer magic } int magicLineLength = endOfMagicLine - magicLineStart; - parser_magic_comment(source, magicLineStart, magicLineLength, new ParserRopeOperations(), (name, value) -> { - magicCommentHandler.accept(name, value); - return isKnownMagicComment(name); - }); + RubyEncoding rubyEncoding = source.getEncoding(); + parser_magic_comment(source, magicLineStart, magicLineLength, + new ParserRopeOperations(rubyEncoding), + (name, value) -> { + magicCommentHandler.accept(name, value.toJavaStringUncached()); + return isKnownMagicComment(name); + }); } } // MRI: parser_magic_comment - private static boolean parser_magic_comment(Rope magicLine, int magicLineOffset, int magicLineLength, + private static boolean parser_magic_comment(TStringWithEncoding magicLine, int magicLineOffset, int magicLineLength, ParserRopeOperations parserRopeOperations, MagicCommentHandler magicCommentHandler) { - boolean emacsStyle = false; int i = magicLineOffset; int end = magicLineOffset + magicLineLength; @@ -1215,7 +1203,7 @@ private static boolean parser_magic_comment(Rope magicLine, int magicLineOffset, // Ignore leading whitespace or '":; while (i < end) { - byte c = magicLine.get(i); + int c = magicLine.get(i); if (isIgnoredMagicLineCharacter(c) || isAsciiSpace(c)) { i++; @@ -1228,7 +1216,7 @@ private static boolean parser_magic_comment(Rope magicLine, int magicLineOffset, // Consume anything except [\s'":;] while (i < end) { - byte c = magicLine.get(i); + int c = magicLine.get(i); if (isIgnoredMagicLineCharacter(c) || isAsciiSpace(c)) { break; @@ -1249,7 +1237,7 @@ private static boolean parser_magic_comment(Rope magicLine, int magicLineOffset, } // Expect ':' between name and value - final byte sep = magicLine.get(i); + final int sep = magicLine.get(i); if (sep == ':') { i++; } else { @@ -1288,7 +1276,7 @@ private static boolean parser_magic_comment(Rope magicLine, int magicLineOffset, } else { valueBegin = i; while (i < end) { - byte c = magicLine.get(i); + int c = magicLine.get(i); if (c != '"' && c != ';' && !isAsciiSpace(c)) { i++; } else { @@ -1300,7 +1288,8 @@ private static boolean parser_magic_comment(Rope magicLine, int magicLineOffset, if (emacsStyle) { // Ignore trailing whitespace or ; - while (i < end && (magicLine.get(i) == ';' || isAsciiSpace(magicLine.get(i)))) { + while (i < end && (magicLine.get(i) == ';' || + isAsciiSpace(magicLine.get(i)))) { i++; } } else { @@ -1314,10 +1303,8 @@ private static boolean parser_magic_comment(Rope magicLine, int magicLineOffset, } } - final String name = RopeOperations - .decodeRopeSegment(magicLine, nameBegin, nameEnd - nameBegin) - .replace('-', '_'); - final Rope value = parserRopeOperations.makeShared(magicLine, valueBegin, valueEnd - valueBegin); + final String name = magicLine.substring(nameBegin, nameEnd - nameBegin).toJavaString().replace('-', '_'); + final TruffleString value = magicLine.substringAsTString(valueBegin, valueEnd - valueBegin); if (!magicCommentHandler.onMagicComment(name, value)) { return false; @@ -1327,7 +1314,7 @@ private static boolean parser_magic_comment(Rope magicLine, int magicLineOffset, return true; } - private static boolean isIgnoredMagicLineCharacter(byte c) { + private static boolean isIgnoredMagicLineCharacter(int c) { switch (c) { case '\'': case '"': @@ -1340,14 +1327,14 @@ private static boolean isIgnoredMagicLineCharacter(byte c) { } /* MRI: magic_comment_marker Find -*-, as in emacs "file local variable" (special comment at the top of the file) */ - private static int findEmacsStyleMarker(Rope str, int begin, int end) { - final byte[] bytes = str.getBytes(); + private static int findEmacsStyleMarker(TStringWithEncoding str, int begin, int end) { + var bytes = str.getInternalByteArray(); int i = begin; while (i < end) { - switch (bytes[i]) { + switch (bytes.get(i)) { case '-': - if (i >= 2 && bytes[i - 1] == '*' && bytes[i - 2] == '-') { + if (i >= 2 && bytes.get(i - 1) == '*' && bytes.get(i - 2) == '-') { return i + 1; } i += 2; @@ -1357,9 +1344,9 @@ private static int findEmacsStyleMarker(Rope str, int begin, int end) { return -1; } - if (bytes[i + 1] != '-') { + if (bytes.get(i + 1) != '-') { i += 4; - } else if (bytes[i - 1] != '-') { + } else if (bytes.get(i - 1) != '-') { i += 2; } else { return i + 2; @@ -1374,7 +1361,7 @@ private static int findEmacsStyleMarker(Rope str, int begin, int end) { } @Override - public boolean onMagicComment(String name, Rope value) { + public boolean onMagicComment(String name, TruffleString value) { if (isMagicEncodingComment(name)) { magicCommentEncoding(value); return true; @@ -1449,7 +1436,7 @@ private int at() { } private int backtick(boolean commandState) { - yaccValue = RopeConstants.BACKTICK; + yaccValue = TStringConstants.BACKTICK; if (isLexState(lex_state, EXPR_FNAME)) { setState(EXPR_ENDFN); @@ -1471,7 +1458,7 @@ private int bang() { if (isAfterOperator()) { setState(EXPR_ARG); if (c == '@') { - yaccValue = RopeConstants.BANG; + yaccValue = TStringConstants.BANG; return RubyParser.tBANG; } } else { @@ -1480,16 +1467,16 @@ private int bang() { switch (c) { case '=': - yaccValue = RopeConstants.BANG_EQ; + yaccValue = TStringConstants.BANG_EQ; return RubyParser.tNEQ; case '~': - yaccValue = RopeConstants.BANG_TILDE; + yaccValue = TStringConstants.BANG_TILDE; return RubyParser.tNMATCH; default: // Just a plain bang pushback(c); - yaccValue = RopeConstants.BANG; + yaccValue = TStringConstants.BANG; return RubyParser.tBANG; } @@ -1499,14 +1486,14 @@ private int caret() { int c = nextc(); if (c == '=') { setState(EXPR_BEG); - yaccValue = RopeConstants.CARET; + yaccValue = TStringConstants.CARET; return RubyParser.tOP_ASGN; } setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG); pushback(c); - yaccValue = RopeConstants.CARET; + yaccValue = TStringConstants.CARET; return RubyParser.tCARET; } @@ -1516,18 +1503,18 @@ private int colon(boolean spaceSeen) { if (c == ':') { if (isBEG() || isLexState(lex_state, EXPR_CLASS) || (isARG() && spaceSeen)) { setState(EXPR_BEG); - yaccValue = RopeConstants.COLON_COLON; + yaccValue = TStringConstants.COLON_COLON; return RubyParser.tCOLON3; } setState(EXPR_DOT); - yaccValue = RopeConstants.COLON_COLON; + yaccValue = TStringConstants.COLON_COLON; return RubyParser.tCOLON2; } if (isEND() || Character.isWhitespace(c) || c == '#') { pushback(c); setState(EXPR_BEG); - yaccValue = RopeConstants.COLON; + yaccValue = TStringConstants.COLON; warn_balanced(c, spaceSeen, ":", "symbol literal"); return ':'; } @@ -1545,13 +1532,13 @@ private int colon(boolean spaceSeen) { } setState(EXPR_FNAME); - yaccValue = RopeConstants.COLON; + yaccValue = TStringConstants.COLON; return RubyParser.tSYMBEG; } private int comma(int c) { setState(EXPR_BEG | EXPR_LABEL); - yaccValue = RopeConstants.COMMA; + yaccValue = TStringConstants.COMMA; return c; } @@ -1615,8 +1602,7 @@ private int dollar() { case '<': /* $<: reading filename */ case '>': /* $>: default output handle */ case '\"': /* $": already loaded files */ - yaccValue = RopeOperations - .create(new byte[]{ '$', (byte) c }, USASCIIEncoding.INSTANCE, CodeRange.CR_7BIT); + yaccValue = TruffleString.fromByteArrayUncached(new byte[]{ '$', (byte) c }, US_ASCII, false); return RubyParser.tGVAR; case '-': @@ -1640,8 +1626,7 @@ private int dollar() { case '+': /* $+: string matches last paren. */ // Explicit reference to these vars as symbols... if (isLexState(last_state, EXPR_FNAME)) { - yaccValue = RopeOperations - .create(new byte[]{ '$', (byte) c }, USASCIIEncoding.INSTANCE, CodeRange.CR_7BIT); + yaccValue = TruffleString.fromByteArrayUncached(new byte[]{ '$', (byte) c }, US_ASCII, false); return RubyParser.tGVAR; } @@ -1667,7 +1652,7 @@ private int dollar() { } int ref; - String refAsString = createTokenRope().getJavaString(); + String refAsString = createTokenRope().toJavaStringUncached(); try { ref = Integer.parseInt(refAsString.substring(1).intern()); @@ -1679,7 +1664,7 @@ private int dollar() { yaccValue = new NthRefParseNode(getPosition(), ref); return RubyParser.tNTH_REF; case '0': - return identifierToken(RubyParser.tGVAR, RopeConstants.DOLLAR_ZERO); + return identifierToken(RubyParser.tGVAR, TStringConstants.DOLLAR_ZERO); default: if (!isIdentifierChar(c)) { if (c == EOF || isSpace(c)) { @@ -1710,11 +1695,11 @@ private int dot() { setState(EXPR_BEG); if ((c = nextc()) == '.') { if ((c = nextc()) == '.') { - yaccValue = RopeConstants.DOT_DOT_DOT; + yaccValue = TStringConstants.DOT_DOT_DOT; return isBeg ? RubyParser.tBDOT3 : RubyParser.tDOT3; } pushback(c); - yaccValue = RopeConstants.DOT_DOT; + yaccValue = TStringConstants.DOT_DOT; return isBeg ? RubyParser.tBDOT2 : RubyParser.tDOT2; } @@ -1726,14 +1711,14 @@ private int dot() { } setState(EXPR_DOT); - yaccValue = RopeConstants.DOT; + yaccValue = TStringConstants.DOT; return RubyParser.tDOT; } private int doubleQuote(boolean commandState) { int label = isLabelPossible(commandState) ? str_label : 0; lex_strterm = new StringTerm(str_dquote | label, '\0', '"', ruby_sourceline); - yaccValue = RopeConstants.QQ; + yaccValue = TStringConstants.QQ; return RubyParser.tSTRING_BEG; } @@ -1745,22 +1730,22 @@ private int greaterThan() { switch (c) { case '=': - yaccValue = RopeConstants.GT_EQ; + yaccValue = TStringConstants.GT_EQ; return RubyParser.tGEQ; case '>': if ((c = nextc()) == '=') { setState(EXPR_BEG); - yaccValue = RopeConstants.GT_GT; + yaccValue = TStringConstants.GT_GT; return RubyParser.tOP_ASGN; } pushback(c); - yaccValue = RopeConstants.GT_GT; + yaccValue = TStringConstants.GT_GT; return RubyParser.tRSHFT; default: pushback(c); - yaccValue = RopeConstants.GT; + yaccValue = TStringConstants.GT; return RubyParser.tGT; } } @@ -1797,7 +1782,7 @@ private int identifier(int c, boolean commandState) { int result = 0; last_state = lex_state; - Rope tempVal; + TruffleString tempVal; if (lastBangOrPredicate) { result = RubyParser.tFID; tempVal = createTokenRope(); @@ -1837,7 +1822,7 @@ private int identifier(int c, boolean commandState) { } if (lex_state != EXPR_DOT) { - Keyword keyword = getKeyword(tempVal); // Is it is a keyword? + Keyword keyword = getKeyword(tempVal, encoding); // Is it is a keyword? if (keyword != null) { int state = lex_state; // Save state at time keyword is encountered @@ -1889,15 +1874,15 @@ private int leftBracket(boolean spaceSeen) { if ((c = nextc()) == ']') { if (peek('=')) { nextc(); - yaccValue = RopeConstants.LBRACKET_RBRACKET_EQ; + yaccValue = TStringConstants.LBRACKET_RBRACKET_EQ; return RubyParser.tASET; } - yaccValue = RopeConstants.LBRACKET_RBRACKET; + yaccValue = TStringConstants.LBRACKET_RBRACKET; return RubyParser.tAREF; } pushback(c); setState(getState() | EXPR_LABEL); - yaccValue = RopeConstants.LBRACKET; + yaccValue = TStringConstants.LBRACKET; return '['; } else if (isBEG() || (isARG() && (spaceSeen || isLexState(lex_state, EXPR_LABELED)))) { c = RubyParser.tLBRACK; @@ -1906,7 +1891,7 @@ private int leftBracket(boolean spaceSeen) { setState(EXPR_BEG | EXPR_LABEL); conditionState.stop(); cmdArgumentState.stop(); - yaccValue = RopeConstants.LBRACKET; + yaccValue = TStringConstants.LBRACKET; return c; } @@ -1919,7 +1904,7 @@ private int leftCurly() { parenNest--; conditionState.stop(); cmdArgumentState.stop(); - yaccValue = RopeConstants.LCURLY; + yaccValue = TStringConstants.LCURLY; return RubyParser.tLAMBEG; } @@ -1992,24 +1977,24 @@ private int lessThan(boolean spaceSeen) { switch (c) { case '=': if ((c = nextc()) == '>') { - yaccValue = RopeConstants.LT_EQ_GT; + yaccValue = TStringConstants.LT_EQ_GT; return RubyParser.tCMP; } pushback(c); - yaccValue = RopeConstants.LT_EQ; + yaccValue = TStringConstants.LT_EQ; return RubyParser.tLEQ; case '<': if ((c = nextc()) == '=') { setState(EXPR_BEG); - yaccValue = RopeConstants.LT_LT; + yaccValue = TStringConstants.LT_LT; return RubyParser.tOP_ASGN; } pushback(c); - yaccValue = RopeConstants.LT_LT; + yaccValue = TStringConstants.LT_LT; warn_balanced(c, spaceSeen, "<<", "here document"); return RubyParser.tLSHFT; default: - yaccValue = RopeConstants.LT; + yaccValue = TStringConstants.LT; pushback(c); return RubyParser.tLT; } @@ -2021,27 +2006,27 @@ private int minus(boolean spaceSeen) { if (isAfterOperator()) { setState(EXPR_ARG); if (c == '@') { - yaccValue = RopeConstants.MINUS_AT; + yaccValue = TStringConstants.MINUS_AT; return RubyParser.tUMINUS; } pushback(c); - yaccValue = RopeConstants.MINUS; + yaccValue = TStringConstants.MINUS; return RubyParser.tMINUS; } if (c == '=') { setState(EXPR_BEG); - yaccValue = RopeConstants.MINUS; + yaccValue = TStringConstants.MINUS; return RubyParser.tOP_ASGN; } if (c == '>') { setState(EXPR_ENDFN); - yaccValue = RopeConstants.MINUS_GT; + yaccValue = TStringConstants.MINUS_GT; return RubyParser.tLAMBDA; } if (isBEG() || (isSpaceArg(c, spaceSeen) && arg_ambiguous())) { setState(EXPR_BEG); pushback(c); - yaccValue = RopeConstants.MINUS_AT; + yaccValue = TStringConstants.MINUS_AT; if (Character.isDigit(c)) { return RubyParser.tUMINUS_NUM; } @@ -2049,7 +2034,7 @@ private int minus(boolean spaceSeen) { } setState(EXPR_BEG); pushback(c); - yaccValue = RopeConstants.MINUS; + yaccValue = TStringConstants.MINUS; warn_balanced(c, spaceSeen, "-", "unary operator"); return RubyParser.tMINUS; } @@ -2063,7 +2048,7 @@ private int percent(boolean spaceSeen) { if (c == '=') { setState(EXPR_BEG); - yaccValue = RopeConstants.PERCENT; + yaccValue = TStringConstants.PERCENT; return RubyParser.tOP_ASGN; } @@ -2074,7 +2059,7 @@ private int percent(boolean spaceSeen) { setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG); pushback(c); - yaccValue = RopeConstants.PERCENT; + yaccValue = TStringConstants.PERCENT; warn_balanced(c, spaceSeen, "%", "string literal"); return RubyParser.tPERCENT; } @@ -2087,21 +2072,21 @@ private int pipe() { setState(EXPR_BEG); if ((c = nextc()) == '=') { setState(EXPR_BEG); - yaccValue = RopeConstants.OR_OR; + yaccValue = TStringConstants.OR_OR; return RubyParser.tOP_ASGN; } pushback(c); - yaccValue = RopeConstants.OR_OR; + yaccValue = TStringConstants.OR_OR; return RubyParser.tOROP; case '=': setState(EXPR_BEG); - yaccValue = RopeConstants.OR; + yaccValue = TStringConstants.OR; return RubyParser.tOP_ASGN; default: setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG | EXPR_LABEL); pushback(c); - yaccValue = RopeConstants.OR; + yaccValue = TStringConstants.OR; return RubyParser.tPIPE; } } @@ -2111,17 +2096,17 @@ private int plus(boolean spaceSeen) { if (isAfterOperator()) { setState(EXPR_ARG); if (c == '@') { - yaccValue = RopeConstants.PLUS_AT; + yaccValue = TStringConstants.PLUS_AT; return RubyParser.tUPLUS; } pushback(c); - yaccValue = RopeConstants.PLUS; + yaccValue = TStringConstants.PLUS; return RubyParser.tPLUS; } if (c == '=') { setState(EXPR_BEG); - yaccValue = RopeConstants.PLUS; + yaccValue = TStringConstants.PLUS; return RubyParser.tOP_ASGN; } @@ -2132,13 +2117,13 @@ private int plus(boolean spaceSeen) { c = '+'; return parseNumber(c); } - yaccValue = RopeConstants.PLUS_AT; + yaccValue = TStringConstants.PLUS_AT; return RubyParser.tUPLUS; } setState(EXPR_BEG); pushback(c); - yaccValue = RopeConstants.PLUS; + yaccValue = TStringConstants.PLUS; warn_balanced(c, spaceSeen, "+", "unary operator"); return RubyParser.tPLUS; } @@ -2148,7 +2133,7 @@ private int questionMark() { if (isEND()) { setState(EXPR_VALUE); - yaccValue = RopeConstants.QUESTION; + yaccValue = TStringConstants.QUESTION; return '?'; } @@ -2187,7 +2172,7 @@ private int questionMark() { } pushback(c); setState(EXPR_VALUE); - yaccValue = RopeConstants.QUESTION; + yaccValue = TStringConstants.QUESTION; return '?'; } @@ -2199,13 +2184,13 @@ private int questionMark() { newtok(true); pushback(c); setState(EXPR_VALUE); - yaccValue = RopeConstants.QUESTION; + yaccValue = TStringConstants.QUESTION; return '?'; } else if (c == '\\') { if (peek('u')) { nextc(); // Eat 'u' - RopeBuilder oneCharBL = new RopeBuilder(); - oneCharBL.setEncoding(getEncoding()); + TStringBuilder oneCharBL = new TStringBuilder(); + oneCharBL.setEncoding(encoding); c = readUTFEscape(oneCharBL, false, false); @@ -2216,7 +2201,7 @@ private int questionMark() { } setState(EXPR_END); - yaccValue = new StrParseNode(getPosition(), oneCharBL.toRope()); + yaccValue = new StrParseNode(getPosition(), oneCharBL.toTStringWithEnc()); return RubyParser.tCHAR; } else { @@ -2226,7 +2211,7 @@ private int questionMark() { newtok(true); } - yaccValue = new StrParseNode(getPosition(), RopeConstants.ASCII_8BIT_SINGLE_BYTE_ROPES[c]); + yaccValue = new StrParseNode(getPosition(), TStringConstants.BINARY_SINGLE_BYTE[c], Encodings.BINARY); setState(EXPR_END); return RubyParser.tCHAR; } @@ -2236,7 +2221,7 @@ private int rightBracket() { conditionState.restart(); cmdArgumentState.restart(); setState(EXPR_END); - yaccValue = RopeConstants.RBRACKET; + yaccValue = TStringConstants.RBRACKET; return RubyParser.tRBRACK; } @@ -2244,7 +2229,7 @@ private int rightCurly() { conditionState.restart(); cmdArgumentState.restart(); setState(EXPR_END); - yaccValue = RopeConstants.RCURLY; + yaccValue = TStringConstants.RCURLY; int tok = braceNest == 0 ? RubyParser.tSTRING_DEND : RubyParser.tRCURLY; braceNest--; return tok; @@ -2255,14 +2240,14 @@ private int rightParen() { conditionState.restart(); cmdArgumentState.restart(); setState(EXPR_ENDFN); - yaccValue = RopeConstants.RPAREN; + yaccValue = TStringConstants.RPAREN; return RubyParser.tRPAREN; } private int singleQuote(boolean commandState) { int label = isLabelPossible(commandState) ? str_label : 0; lex_strterm = new StringTerm(str_squote | label, '\0', '\'', ruby_sourceline); - yaccValue = RopeConstants.Q; + yaccValue = TStringConstants.Q; return RubyParser.tSTRING_BEG; } @@ -2270,7 +2255,7 @@ private int singleQuote(boolean commandState) { private int slash(boolean spaceSeen) { if (isBEG()) { lex_strterm = new StringTerm(str_regexp, '\0', '/', ruby_sourceline); - yaccValue = RopeConstants.SLASH; + yaccValue = TStringConstants.SLASH; return RubyParser.tREGEXP_BEG; } @@ -2278,20 +2263,20 @@ private int slash(boolean spaceSeen) { if (c == '=') { setState(EXPR_BEG); - yaccValue = RopeConstants.SLASH; + yaccValue = TStringConstants.SLASH; return RubyParser.tOP_ASGN; } pushback(c); if (isSpaceArg(c, spaceSeen)) { arg_ambiguous(); lex_strterm = new StringTerm(str_regexp, '\0', '/', ruby_sourceline); - yaccValue = RopeConstants.SLASH; + yaccValue = TStringConstants.SLASH; return RubyParser.tREGEXP_BEG; } setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG); - yaccValue = RopeConstants.SLASH; + yaccValue = TStringConstants.SLASH; warn_balanced(c, spaceSeen, "/", "regexp literal"); return RubyParser.tDIVIDE; } @@ -2303,12 +2288,12 @@ private int star(boolean spaceSeen) { case '*': if ((c = nextc()) == '=') { setState(EXPR_BEG); - yaccValue = RopeConstants.STAR_STAR; + yaccValue = TStringConstants.STAR_STAR; return RubyParser.tOP_ASGN; } pushback(c); // not a '=' put it back - yaccValue = RopeConstants.STAR_STAR; + yaccValue = TStringConstants.STAR_STAR; if (isSpaceArg(c, spaceSeen)) { warnings.warning( @@ -2325,7 +2310,7 @@ private int star(boolean spaceSeen) { break; case '=': setState(EXPR_BEG); - yaccValue = RopeConstants.STAR; + yaccValue = TStringConstants.STAR; return RubyParser.tOP_ASGN; default: pushback(c); @@ -2341,7 +2326,7 @@ private int star(boolean spaceSeen) { warn_balanced(c, spaceSeen, "*", "argument prefix"); c = RubyParser.tSTAR2; } - yaccValue = RopeConstants.STAR; + yaccValue = TStringConstants.STAR; } setState(isAfterOperator() ? EXPR_ARG : EXPR_BEG); @@ -2360,7 +2345,7 @@ private int tilde() { setState(EXPR_BEG); } - yaccValue = RopeConstants.TILDE; + yaccValue = TStringConstants.TILDE; return RubyParser.tTILDE; } @@ -2610,7 +2595,7 @@ private int getNumberToken(String number, boolean seen_e, boolean seen_point, in // Note: parser_tokadd_utf8 variant just for regexp literal parsing. This variant is to be // called when string_literal and regexp_literal. - public void readUTFEscapeRegexpLiteral(RopeBuilder buffer) { + public void readUTFEscapeRegexpLiteral(TStringBuilder buffer) { buffer.append('\\'); buffer.append('u'); @@ -2634,7 +2619,7 @@ public void readUTFEscapeRegexpLiteral(RopeBuilder buffer) { } // MRI: parser_tokadd_utf8 sans regexp literal parsing - public int readUTFEscape(RopeBuilder buffer, boolean stringLiteral, boolean symbolLiteral) { + public int readUTFEscape(TStringBuilder buffer, boolean stringLiteral, boolean symbolLiteral) { int codepoint; int c; @@ -2664,9 +2649,9 @@ public int readUTFEscape(RopeBuilder buffer, boolean stringLiteral, boolean symb return codepoint; } - private void readUTF8EscapeIntoBuffer(int codepoint, RopeBuilder buffer, boolean stringLiteral) { + private void readUTF8EscapeIntoBuffer(int codepoint, TStringBuilder buffer, boolean stringLiteral) { if (codepoint >= 0x80) { - buffer.setEncoding(UTF8Encoding.INSTANCE); + buffer.setEncoding(Encodings.UTF_8); if (stringLiteral) { tokaddmbc(codepoint, buffer); } @@ -2745,7 +2730,7 @@ public int readEscape() { /** Read up to count hexadecimal digits and store those digits in a token numberBuffer. If strict is provided then * count number of hex digits must be present. If no digits can be read a syntax exception will be thrown. This will * also return the codepoint as a value so codepoint ranges can be checked. */ - private char scanHexLiteral(RopeBuilder buffer, int count, boolean strict, String errorMessage) { + private char scanHexLiteral(TStringBuilder buffer, int count, boolean strict, String errorMessage) { int i = 0; char hexValue = '\0'; @@ -2826,7 +2811,9 @@ private int scanHex(int count, boolean strict, String errorMessage) { // --- LINE + POSITION --- /** The current line being parsed */ - Rope lexb = null; + TruffleString lexb = null; + public RubyEncoding encoding = null; + public TruffleString.Encoding tencoding = null; // There use to be a variable called lex_lastline, but it was always identical to lexb. /** Always 0, except when parsing a UTF-8 BOM in parser_prepare() */ @@ -2869,8 +2856,6 @@ private int scanHex(int count, boolean strict, String errorMessage) { private int tokp = 0; /** Value of last token which had a value associated with it. */ private Object yaccValue; - /** The character code range for the last token. */ - private CodeRange tokenCR; /** Snapshot of {@link #ruby_sourceline} for the last token. */ private int ruby_sourceline_when_tokline_created; /** Source span for the whole line of the last token. */ @@ -2902,7 +2887,7 @@ private int scanHex(int count, boolean strict, String errorMessage) { public boolean commandStart; protected StackState conditionState = new StackState(); protected StackState cmdArgumentState = new StackState(); - private Rope current_arg; + private TruffleString current_arg; public boolean inKwarg = false; protected int last_cr_line; private int leftParenBegin = 0; @@ -2926,31 +2911,21 @@ protected boolean comment_at_top() { } /** Returns a rope for the current token, spanning from {@link #tokp} to {@link #lex_p}. */ - public Rope createTokenByteArrayView() { - return parserRopeOperations.makeShared(lexb, tokp, lex_p - tokp); - } - - @Deprecated - public String createTokenString(int start) { - return RopeOperations.decodeRopeSegment(lexb, start, lex_p - start); - } - - @Deprecated - public String createTokenString() { - return createTokenString(tokp); + public TruffleString createTokenByteArrayView() { + return src.parserRopeOperations.makeShared(lexb, tokp, lex_p - tokp); } - public Rope createTokenRope(int start) { - return parserRopeOperations.makeShared(lexb, start, lex_p - start); + public TruffleString createTokenRope(int start) { + return lexb.substringByteIndexUncached(start, lex_p - start, tencoding, true); } - public Rope createTokenRope() { + public TruffleString createTokenRope() { return createTokenRope(tokp); } /** Returns a substring rope equivalent equivalent to the given rope (which contains a single line), dedented by the * given width. */ - private Rope dedent_string(Rope string, int width) { + private TruffleString dedent_string(TStringWithEncoding string, int width) { int len = string.byteLength(); int i, col = 0; @@ -2968,7 +2943,7 @@ private Rope dedent_string(Rope string, int width) { } } - return parserRopeOperations.makeShared(string, i, len - i); + return string.substringAsTString(i, len - i); } /** Sets the token start position ({@link #tokp}) to the current position ({@link #lex_p}). */ @@ -2988,16 +2963,21 @@ public StackState getConditionState() { return conditionState; } - public Rope getCurrentArg() { + public TruffleString getCurrentArg() { return current_arg; } public String getCurrentLine() { - return RopeOperations.decodeRope(lexb); + return lexb.toJavaStringUncached(); } public Encoding getEncoding() { - return src.getEncoding(); + assert encoding.jcoding == src.getEncoding(); + return encoding.jcoding; + } + + public TruffleString.Encoding getTEncoding() { + return tencoding; } public String getFile() { @@ -3020,25 +3000,6 @@ public int getState() { return lex_state; } - public CodeRange getTokenCR() { - if (tokenCR != null) { - return tokenCR; - } else { - // The CR is null if the yaccValue is hard-coded inside the lexer, rather than determined by a token scan. - // This can happen, for instance, if the lexer is consuming tokens that might correspond to operators and - // then determines the characters are actually part of an identifier (see lessThan for such - // a case). - - if (lexb.isAsciiOnly()) { - // We don't know which substring of lexb was used for the token at this point, but if the source string - // is CR_7BIT, all substrings must be CR_7BIT by definition. - return CR_7BIT; - } else { - return CR_UNKNOWN; - } - } - } - public int incrementParenNest() { parenNest++; @@ -3183,7 +3144,7 @@ public void lex_goto_eol() { lex_p = lex_pend; } - protected void magicCommentEncoding(Rope encoding) { + protected void magicCommentEncoding(TruffleString encoding) { if (!comment_at_top()) { return; } @@ -3206,9 +3167,6 @@ public void newtok(boolean unreadOnce) { tokline = getPosition(); ruby_sourceline_when_tokline_created = ruby_sourceline; - // We assume all idents are valid (or 7BIT if ASCII-compatible), until they aren't. - tokenCR = src.getEncoding().isAsciiCompatible() ? CodeRange.CR_7BIT : CodeRange.CR_VALID; - tokp = lex_p - (unreadOnce ? 1 : 0); } @@ -3272,7 +3230,7 @@ public void parser_prepare() { } public int p(int offset) { - return lexb.getBytes()[offset] & 0xff; + return lexb.readByteUncached(offset, tencoding); } public boolean peek(int c) { @@ -3286,30 +3244,20 @@ protected boolean peek(int c, int n) { public int precise_mbclen() { // A broken string has at least one character with an invalid byte sequence. It doesn't matter which one we // report as invalid because the error reported to the user will only note the start position of the string. - if (lexb.getCodeRange() == CR_BROKEN) { - return -1; + if (!lexb.isValidUncached(tencoding)) { + compile_error("invalid multibyte char (" + getEncoding() + ")"); } // A substring of a single-byte optimizable string is always single-byte optimizable, so there's no need // to actually perform the substring operation. - if (lexb.isSingleByteOptimizable()) { + if (TStringUtils.isSingleByteOptimizable(lexb, encoding)) { return 1; } // we subtract one since we have read past first byte by time we are calling this. final int start = lex_p - 1; - final int end = lex_pend; - final int length = end - start; - - // Otherwise, take the substring and see if that new string is single-byte optimizable. - final Rope rope = parserRopeOperations.makeShared(lexb, start, length); - if (rope.isSingleByteOptimizable()) { - return 1; - } - // Barring all else, we must inspect the bytes for the substring. - return StringSupport - .characterLength(src.getEncoding(), rope.getCodeRange(), rope.getBytes(), 0, rope.byteLength()); + return lexb.byteLengthOfCodePointUncached(start, tencoding, ErrorHandling.RETURN_NEGATIVE); } public void pushback(int c) { @@ -3365,13 +3313,16 @@ protected char scanOct(int count) { return value; } - public void setCurrentArg(Rope current_arg) { + public void setCurrentArg(TruffleString current_arg) { this.current_arg = current_arg; } - public void setEncoding(Encoding encoding) { - src.setEncoding(encoding); - lexb = parserRopeOperations.withEncoding(lexb, encoding); + public void setEncoding(Encoding jcoding) { + src.setEncoding(jcoding); + var prevEncoding = this.encoding; + this.encoding = Encodings.getBuiltInEncoding(jcoding); + this.tencoding = this.encoding.tencoding; + lexb = lexb.forceEncodingUncached(prevEncoding.tencoding, tencoding); } protected void set_file_encoding(int str, int send) { @@ -3418,7 +3369,8 @@ protected void set_file_encoding(int str, int send) { } continue; } - if (RopeOperations.caseInsensitiveCmp(parserRopeOperations.makeShared(lexb, str - 6, 6), CODING) == 0) { + if (src.parserRopeOperations.makeShared(lexb, str - 6, 6).toJavaStringUncached() + .equalsIgnoreCase("coding")) { break; } } @@ -3444,7 +3396,7 @@ protected void set_file_encoding(int str, int send) { int beg = str; while ((p(str) == '-' || p(str) == '_' || Character.isLetterOrDigit(p(str))) && ++str < send) { } - setEncoding(parserRopeOperations.makeShared(lexb, beg, str - beg)); + setEncoding(src.parserRopeOperations.makeShared(lexb, beg, str - beg)); } public void setHeredocLineIndent(int heredoc_line_indent) { @@ -3474,20 +3426,18 @@ public void setValue(Object yaccValue) { this.yaccValue = yaccValue; } - protected boolean strncmp(Rope one, Rope two, int length) { - if (one.byteLength() != two.byteLength() && (one.byteLength() < length || two.byteLength() < length)) { - return false; - } - - return ArrayUtils.regionEquals(one.getBytes(), 0, two.getBytes(), 0, length); + protected boolean strncmp(TruffleString one, TruffleString two, int length) { + return one.regionEqualByteIndexUncached(0, two, 0, length, tencoding); } - public void tokAdd(int first_byte, RopeBuilder buffer) { + public void tokAdd(int first_byte, TStringBuilder buffer) { buffer.append((byte) first_byte); } - public void tokCopy(int length, RopeBuilder buffer) { - buffer.append(ArrayUtils.extractRange(lexb.getBytes(), lex_p - length, lex_p)); + public void tokCopy(int length, TStringBuilder buffer) { + var bytes = lexb.getInternalByteArrayUncached(tencoding); + buffer.append(ArrayUtils.extractRange(bytes.getArray(), bytes.getOffset() + lex_p - length, + bytes.getOffset() + lex_p)); } public boolean tokadd_ident(int c) { @@ -3512,11 +3462,8 @@ public boolean tokadd_ident(int c) { * token it will just get the bytes directly from source directly. */ public boolean tokadd_mbchar(int firstByte) { int length = precise_mbclen(); - if (length <= 0) { compile_error("invalid multibyte char (" + getEncoding() + ")"); - } else if (length > 1 || (tokenCR == CR_7BIT && !isASCII(firstByte))) { - tokenCR = CodeRange.CR_VALID; } lex_p += length - 1; // we already read first byte so advance pointer for remainder @@ -3525,7 +3472,7 @@ public boolean tokadd_mbchar(int firstByte) { } // mri: parser_tokadd_mbchar - public boolean tokadd_mbchar(int firstByte, RopeBuilder buffer) { + public boolean tokadd_mbchar(int firstByte, TStringBuilder buffer) { int length = precise_mbclen(); if (length <= 0) { @@ -3543,7 +3490,7 @@ public boolean tokadd_mbchar(int firstByte, RopeBuilder buffer) { /** This looks deceptively like tokadd_mbchar(int, ByteArrayView) but it differs in that it uses the bytelists * encoding and the first parameter is a full codepoint and not the first byte of a mbc sequence. */ - public void tokaddmbc(int codepoint, RopeBuilder buffer) { + public void tokaddmbc(int codepoint, TStringBuilder buffer) { Encoding encoding = buffer.getEncoding(); int length = encoding.codeToMbcLength(codepoint); final byte[] bytes = Arrays.copyOf(buffer.getBytes(), buffer.getLength() + length); @@ -3593,16 +3540,16 @@ public boolean update_heredoc_indent(int c) { } } - public void validateFormalIdentifier(Rope identifier) { + public void validateFormalIdentifier(TruffleString identifier) { if (isFirstCodepointUppercase(identifier)) { compile_error("formal argument cannot be a constant"); } - int first = identifier.get(0) & 0xFF; + int first = identifier.readByteUncached(0, tencoding); switch (first) { case '@': - if (identifier.get(1) == '@') { + if (identifier.readByteUncached(1, tencoding) == '@') { compile_error("formal argument cannot be a class variable"); } else { compile_error("formal argument cannot be an instance variable"); @@ -3614,7 +3561,7 @@ public void validateFormalIdentifier(Rope identifier) { default: // This mechanism feels a tad dicey but at this point we are dealing with a valid // method name at least so we should not need to check the entire string... - byte last = identifier.get(identifier.byteLength() - 1); + int last = identifier.readByteUncached(identifier.byteLength(tencoding) - 1, tencoding); if (last == '=' || last == '?' || last == '!') { compile_error("formal argument must be local variable"); @@ -3643,8 +3590,8 @@ public boolean was_bol() { /** Indicates whether the current line matches the given marker, after stripping away leading whitespace if * {@code indent} is true. Does not advance the input position ({@link #lex_p}). */ - boolean whole_match_p(Rope eos, boolean indent) { - int len = eos.byteLength(); + boolean whole_match_p(TruffleString eos, TruffleString.Encoding enc, boolean indent) { + int len = eos.byteLength(enc); int p = lex_pbeg; if (indent) { @@ -3668,7 +3615,7 @@ boolean whole_match_p(Rope eos, boolean indent) { } } - return strncmp(eos, parserRopeOperations.makeShared(lexb, p, len), len); + return strncmp(eos, src.parserRopeOperations.makeShared(lexb, p, len), len); } public static final int TAB_WIDTH = 8; @@ -3697,14 +3644,14 @@ boolean whole_match_p(Rope eos, boolean indent) { public static final int EOF = -1; // 0 in MRI - public static final Rope END_MARKER = RopeOperations - .create(new byte[]{ '_', '_', 'E', 'N', 'D', '_', '_' }, ASCIIEncoding.INSTANCE, CR_7BIT); - public static final Rope BEGIN_DOC_MARKER = RopeOperations - .create(new byte[]{ 'b', 'e', 'g', 'i', 'n' }, ASCIIEncoding.INSTANCE, CR_7BIT); - public static final Rope END_DOC_MARKER = RopeOperations - .create(new byte[]{ 'e', 'n', 'd' }, ASCIIEncoding.INSTANCE, CR_7BIT); - public static final Rope CODING = RopeOperations - .create(new byte[]{ 'c', 'o', 'd', 'i', 'n', 'g' }, ASCIIEncoding.INSTANCE, CR_7BIT); + public static final TruffleString END_MARKER = TruffleString.fromByteArrayUncached( + new byte[]{ '_', '_', 'E', 'N', 'D', '_', '_' }, TruffleString.Encoding.BYTES, false); + public static final TruffleString BEGIN_DOC_MARKER = TruffleString.fromByteArrayUncached( + new byte[]{ 'b', 'e', 'g', 'i', 'n' }, TruffleString.Encoding.BYTES, false); + public static final TruffleString END_DOC_MARKER = TruffleString.fromByteArrayUncached( + new byte[]{ 'e', 'n', 'd' }, TruffleString.Encoding.BYTES, false); + public static final TruffleString CODING = TruffleString.fromByteArrayUncached( + new byte[]{ 'c', 'o', 'd', 'i', 'n', 'g' }, TruffleString.Encoding.BYTES, false); public static final int SUFFIX_R = 1 << 0; public static final int SUFFIX_I = 1 << 1; @@ -3771,15 +3718,14 @@ protected boolean isSpaceArg(int c, boolean spaceSeen) { /** Encoding-aware (including multi-byte encodings) check of first codepoint of a given rope, usually to determine * if it is a constant */ - private boolean isFirstCodepointUppercase(Rope rope) { - Encoding ropeEncoding = rope.encoding; - int firstByte = rope.get(0) & 0xFF; + private boolean isFirstCodepointUppercase(TruffleString rope) { + Encoding ropeEncoding = encoding.jcoding; + int firstByte = rope.readByteUncached(0, tencoding); if (ropeEncoding.isAsciiCompatible() && isASCII(firstByte)) { return StringSupport.isAsciiUppercase((byte) firstByte); } else { - byte[] ropeBytes = rope.getBytes(); - int firstCharacter = ropeEncoding.mbcToCode(ropeBytes, 0, ropeBytes.length); + int firstCharacter = rope.codePointAtByteIndexUncached(0, tencoding, ErrorHandling.BEST_EFFORT); return ropeEncoding.isUpper(firstCharacter); } } diff --git a/src/main/java/org/truffleruby/parser/lexer/StringTerm.java b/src/main/java/org/truffleruby/parser/lexer/StringTerm.java index d6544118a5e0..a18152697a80 100644 --- a/src/main/java/org/truffleruby/parser/lexer/StringTerm.java +++ b/src/main/java/org/truffleruby/parser/lexer/StringTerm.java @@ -46,11 +46,12 @@ import static org.truffleruby.parser.lexer.RubyLexer.isOctChar; import org.jcodings.Encoding; +import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.regexp.RegexpOptions; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeBuilder; -import org.truffleruby.core.rope.RopeConstants; +import org.truffleruby.core.string.TStringBuilder; +import org.truffleruby.core.string.TStringWithEncoding; import org.truffleruby.core.string.KCode; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.parser.ast.RegexpParseNode; import org.truffleruby.parser.parser.RubyParser; @@ -84,9 +85,9 @@ public int getFlags() { return flags; } - protected RopeBuilder createRopeBuilder(RubyLexer lexer) { - RopeBuilder builder = new RopeBuilder(); - builder.setEncoding(lexer.getEncoding()); + protected TStringBuilder createRopeBuilder(RubyLexer lexer) { + TStringBuilder builder = new TStringBuilder(); + builder.setEncoding(lexer.encoding); return builder; } @@ -102,9 +103,9 @@ private int endFound(RubyLexer lexer) { if ((flags & STR_FUNC_REGEXP) != 0) { RegexpOptions options = parseRegexpFlags(lexer); - Rope regexpRope = RopeConstants.EMPTY_US_ASCII_ROPE; lexer.setState(EXPR_END | EXPR_ENDARG); - lexer.setValue(new RegexpParseNode(lexer.getPosition(), regexpRope, options)); + lexer.setValue(new RegexpParseNode(lexer.getPosition(), + new TStringWithEncoding(TStringConstants.EMPTY_US_ASCII, Encodings.US_ASCII), options)); return RubyParser.tREGEXP_END; } @@ -157,7 +158,7 @@ public int parseString(RubyLexer lexer) { return ' '; } - RopeBuilder buffer = createRopeBuilder(lexer); + TStringBuilder buffer = createRopeBuilder(lexer); lexer.newtok(true); if ((flags & STR_FUNC_EXPAND) != 0 && c == '#') { int token = lexer.peekVariableName(RubyParser.tSTRING_DVAR, RubyParser.tSTRING_DBEG); @@ -237,7 +238,7 @@ private void mixedEscape(RubyLexer lexer, Encoding foundEncoding, Encoding parse } // mri: parser_tokadd_string - public int parseStringIntoBuffer(RubyLexer lexer, RopeBuilder buffer, Encoding enc[]) { + public int parseStringIntoBuffer(RubyLexer lexer, TStringBuilder buffer, Encoding enc[]) { boolean qwords = (flags & STR_FUNC_QWORDS) != 0; boolean expand = (flags & STR_FUNC_EXPAND) != 0; boolean escape = (flags & STR_FUNC_ESCAPE) != 0; @@ -286,7 +287,7 @@ public int parseStringIntoBuffer(RubyLexer lexer, RopeBuilder buffer, Encoding e // note the newline and the backslash have been consumed and haven't been added to the buffer! c = '\\'; if (enc != null) { - buffer.setEncoding(lexer.getEncoding()); + buffer.setEncoding(lexer.encoding); } return c; } @@ -430,7 +431,7 @@ private boolean simple_re_meta(int c) { // Was a goto in original ruby lexer @SuppressWarnings("fallthrough") - private void escaped(RubyLexer lexer, RopeBuilder buffer) { + private void escaped(RubyLexer lexer, TStringBuilder buffer) { int c; switch (c = lexer.nextc()) { @@ -445,7 +446,7 @@ private void escaped(RubyLexer lexer, RopeBuilder buffer) { } @SuppressWarnings("fallthrough") - private void parseEscapeIntoBuffer(RubyLexer lexer, RopeBuilder buffer) { + private void parseEscapeIntoBuffer(RubyLexer lexer, TStringBuilder buffer) { int c; switch (c = lexer.nextc()) { diff --git a/src/main/java/org/truffleruby/parser/parser/ParserRopeOperations.java b/src/main/java/org/truffleruby/parser/parser/ParserRopeOperations.java index 63b285d4e400..5f4c1d8848bf 100644 --- a/src/main/java/org/truffleruby/parser/parser/ParserRopeOperations.java +++ b/src/main/java/org/truffleruby/parser/parser/ParserRopeOperations.java @@ -9,18 +9,20 @@ */ package org.truffleruby.parser.parser; -import org.jcodings.Encoding; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeNodesFactory; +import com.oracle.truffle.api.strings.AbstractTruffleString; +import com.oracle.truffle.api.strings.TruffleString; +import org.truffleruby.core.encoding.RubyEncoding; public class ParserRopeOperations { - public Rope withEncoding(Rope rope, Encoding encoding) { - return RopeNodesFactory.WithEncodingNodeGen.getUncached().executeWithEncoding(rope, encoding); + private final TruffleString.Encoding tencoding; + + public ParserRopeOperations(RubyEncoding encoding) { + tencoding = encoding.tencoding; } - public Rope makeShared(Rope rope, int sharedStart, int sharedLength) { - return RopeNodesFactory.SubstringNodeGen.getUncached().executeSubstring(rope, sharedStart, sharedLength); + public TruffleString makeShared(AbstractTruffleString rope, int sharedStart, int sharedLength) { + return rope.substringByteIndexUncached(sharedStart, sharedLength, tencoding, true); } } diff --git a/src/main/java/org/truffleruby/parser/parser/ParserSupport.java b/src/main/java/org/truffleruby/parser/parser/ParserSupport.java index 74854066d687..7f6a7aa6c9ee 100644 --- a/src/main/java/org/truffleruby/parser/parser/ParserSupport.java +++ b/src/main/java/org/truffleruby/parser/parser/ParserSupport.java @@ -35,14 +35,12 @@ ***** END LICENSE BLOCK *****/ package org.truffleruby.parser.parser; -import static org.truffleruby.core.rope.CodeRange.CR_BROKEN; - import java.math.BigInteger; import java.util.ArrayList; import java.util.List; +import com.oracle.truffle.api.strings.TruffleString; import org.jcodings.Encoding; -import org.jcodings.specific.ASCIIEncoding; import org.jcodings.specific.EUCJPEncoding; import org.jcodings.specific.SJISEncoding; import org.jcodings.specific.USASCIIEncoding; @@ -50,16 +48,13 @@ import org.truffleruby.Layouts; import org.truffleruby.RubyLanguage; import org.truffleruby.SuppressFBWarnings; -import org.truffleruby.core.encoding.EncodingManager; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.regexp.ClassicRegexp; import org.truffleruby.core.regexp.RegexpOptions; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.core.rope.RopeWithEncoding; -import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.string.TStringWithEncoding; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.language.control.DeferredRaiseException; import org.truffleruby.language.control.RaiseException; @@ -159,8 +154,8 @@ public class ParserSupport { public static final String FORWARD_ARGS_REST_VAR = Layouts.TEMP_PREFIX + "forward_rest"; /** The local variable to store ... keyword arguments in */ public static final String FORWARD_ARGS_KWREST_VAR = Layouts.TEMP_PREFIX + "forward_kwrest"; - public static final Rope FORWARD_ARGS_KWREST_VAR_ROPE = StringOperations.encodeRope(FORWARD_ARGS_KWREST_VAR, - USASCIIEncoding.INSTANCE); + public static final TruffleString FORWARD_ARGS_KWREST_VAR_TSTRING = TStringUtils + .usAsciiString(FORWARD_ARGS_KWREST_VAR); /** The local variable to store the block from ... in */ public static final String FORWARD_ARGS_BLOCK_VAR = Layouts.TEMP_PREFIX + "forward_block"; @@ -183,7 +178,6 @@ public class ParserSupport { private final String file; private final RubyDeferredWarnings warnings; - private final ParserRopeOperations parserRopeOperations = new ParserRopeOperations(); public ParserSupport(LexerSource source, RubyDeferredWarnings warnings) { this.file = source.getSourcePath(); @@ -246,8 +240,8 @@ public ParseNode gettable2(ParseNode node) { case DASGNNODE: // LOCALVAR case LOCALASGNNODE: String name = ((INameNode) node).getName(); - final Rope currentArg = lexer.getCurrentArg(); - if (currentArg != null && name.equals(currentArg.getJavaString())) { + final TruffleString currentArg = lexer.getCurrentArg(); + if (currentArg != null && name.equals(currentArg.toJavaStringUncached())) { warn(node.getPosition(), "circular argument reference - " + name); } checkDeclarationForNumberedParameterMisuse(name, node); @@ -289,22 +283,23 @@ public static boolean isNumberedParameter(String name) { return name.length() == 2 && name.charAt(0) == '_' && '1' <= name.charAt(1) && name.charAt(1) <= '9'; } - public void checkMethodName(Rope rope) { - String name = rope.getJavaString(); + public void checkMethodName(TruffleString tstring) { + String name = tstring.toJavaStringUncached(); + if (isNumberedParameter(name)) { warnNumberedParameterLikeDeclaration(lexer.getPosition(), name); } } - public ParseNode declareIdentifier(Rope rope) { - return declareIdentifier(rope.getJavaString()); + public ParseNode declareIdentifier(TruffleString rope) { + return declareIdentifier(rope.toJavaStringUncached()); } // Despite the confusing name, called for every identifier use in expressions. public ParseNode declareIdentifier(String string) { String name = string.intern(); - final Rope currentArg = lexer.getCurrentArg(); - if (currentArg != null && name.equals(currentArg.getJavaString())) { + final TruffleString currentArg = lexer.getCurrentArg(); + if (currentArg != null && name.equals(currentArg.toJavaStringUncached())) { warn(lexer.getPosition(), "circular argument reference - " + name); } @@ -340,8 +335,8 @@ public ParseNode declareIdentifier(String string) { } // We know it has to be tLABEL or tIDENTIFIER so none of the other assignable logic is needed - public AssignableParseNode assignableLabelOrIdentifier(Rope name, ParseNode value) { - return assignableLabelOrIdentifier(name.getJavaString().intern(), value); + public AssignableParseNode assignableLabelOrIdentifier(TruffleString name, ParseNode value) { + return assignableLabelOrIdentifier(name.toJavaStringUncached().intern(), value); } public AssignableParseNode assignableLabelOrIdentifier(String name, ParseNode value) { @@ -350,7 +345,7 @@ public AssignableParseNode assignableLabelOrIdentifier(String name, ParseNode va } // We know it has to be tLABEL or tIDENTIFIER so none of the other assignable logic is needed - public AssignableParseNode assignableKeyword(Rope name, ParseNode value) { + public AssignableParseNode assignableKeyword(TruffleString name, ParseNode value) { // JRuby does some extra kwarg tracking when it sees an assignable keyword. We track kwargs in a different // manner and thus don't require a special method for it. However, keeping this method in ParserSupport helps // reduce the differences with the JRuby grammar. @@ -423,24 +418,24 @@ public ParseNode appendToBlock(ParseNode head, ParseNode tail) { } // We know it has to be tLABEL or tIDENTIFIER so none of the other assignable logic is needed - public AssignableParseNode assignableInCurr(Rope name, ParseNode value) { - String nameString = name.getJavaString().intern(); + public AssignableParseNode assignableInCurr(TruffleString name, ParseNode value) { + String nameString = name.toJavaStringUncached().intern(); checkDeclarationForNumberedParameterMisuse(nameString, value); currentScope.addVariableThisScope(nameString); return currentScope.assign(lexer.getPosition(), nameString, makeNullNil(value)); } - public ParseNode getOperatorCallNode(ParseNode firstNode, Rope operator) { + public ParseNode getOperatorCallNode(ParseNode firstNode, TruffleString operator) { value_expr(lexer, firstNode); - return new CallParseNode(firstNode.getPosition(), firstNode, operator.getJavaString(), null, null); + return new CallParseNode(firstNode.getPosition(), firstNode, operator.toJavaStringUncached(), null, null); } - public ParseNode getOperatorCallNode(ParseNode firstNode, Rope operator, ParseNode secondNode) { + public ParseNode getOperatorCallNode(ParseNode firstNode, TruffleString operator, ParseNode secondNode) { return getOperatorCallNode(firstNode, operator, secondNode, null); } - public ParseNode getOperatorCallNode(ParseNode firstNode, Rope operator, ParseNode secondNode, + public ParseNode getOperatorCallNode(ParseNode firstNode, TruffleString operator, ParseNode secondNode, SourceIndexLength defaultPosition) { if (defaultPosition != null) { firstNode = checkForNilNode(firstNode, defaultPosition); @@ -453,7 +448,7 @@ public ParseNode getOperatorCallNode(ParseNode firstNode, Rope operator, ParseNo return new CallParseNode( firstNode.getPosition(), firstNode, - operator.getJavaString(), + operator.toJavaStringUncached(), new ArrayParseNode(secondNode.getPosition(), secondNode), null); } @@ -468,7 +463,7 @@ public ParseNode getMatchNode(ParseNode firstNode, ParseNode secondNode) { return new Match3ParseNode(firstNode.getPosition(), firstNode, secondNode); } - return getOperatorCallNode(firstNode, RopeConstants.EQ_TILDE, secondNode); + return getOperatorCallNode(firstNode, TStringConstants.EQ_TILDE, secondNode); } /** Define an array set condition so we can return lhs @@ -487,17 +482,17 @@ public ParseNode aryset(ParseNode receiver, ParseNode index) { * @param receiver object which contains attribute * @param name of the attribute being set * @return an AttrAssignParseNode */ - public ParseNode attrset(ParseNode receiver, Rope name) { - return attrset(receiver, RopeConstants.DOT, name); + public ParseNode attrset(ParseNode receiver, TruffleString name) { + return attrset(receiver, TStringConstants.DOT, name); } - public ParseNode attrset(ParseNode receiver, Rope callType, Rope name) { + public ParseNode attrset(ParseNode receiver, TruffleString callType, TruffleString name) { value_expr(lexer, receiver); return new_attrassign( receiver.getPosition(), receiver, - name.getJavaString() + "=", + name.toJavaStringUncached() + "=", null, isLazy(callType)); } @@ -869,7 +864,7 @@ private ParseNode getFlipConditionNode(ParseNode node) { if (node instanceof FixnumParseNode) { warnUnlessEOption(node, "integer literal in conditional range"); - return getOperatorCallNode(node, RopeConstants.EQ_EQ, new GlobalVarParseNode(node.getPosition(), "$.")); + return getOperatorCallNode(node, TStringConstants.EQ_EQ, new GlobalVarParseNode(node.getPosition(), "$.")); } return node; @@ -1027,14 +1022,14 @@ public InParseNode newInNode(SourceIndexLength position, ParseNode expressionNod } // FIXME: Currently this is passing in position of receiver - public ParseNode new_opElementAsgnNode(ParseNode receiverNode, Rope operatorName, ParseNode argsNode, + public ParseNode new_opElementAsgnNode(ParseNode receiverNode, TruffleString operatorName, ParseNode argsNode, ParseNode valueNode) { SourceIndexLength position = lexer.tokline; // FIXME: ruby_sourceline in new lexer. ParseNode newNode = new OpElementAsgnParseNode( position, receiverNode, - operatorName.getJavaString(), + operatorName.toJavaStringUncached(), argsNode, valueNode); @@ -1044,22 +1039,24 @@ public ParseNode new_opElementAsgnNode(ParseNode receiverNode, Rope operatorName } // JRuby would return a RubySymbol but we don't want to create RubySymbols so early, and don't need the Symbol - public Rope symbolID(Rope identifier) { + public TruffleString symbolID(TruffleString identifier) { return identifier; } - public ParseNode newOpAsgn(SourceIndexLength position, ParseNode receiverNode, Rope callType, ParseNode valueNode, - Rope variableName, Rope operatorName) { + public ParseNode newOpAsgn(SourceIndexLength position, ParseNode receiverNode, TruffleString callType, + ParseNode valueNode, + TruffleString variableName, TruffleString operatorName) { return new OpAsgnParseNode( position, receiverNode, valueNode, - variableName.getJavaString(), - operatorName.getJavaString(), + variableName.toJavaStringUncached(), + operatorName.toJavaStringUncached(), isLazy(callType)); } - public ParseNode newOpConstAsgn(SourceIndexLength position, ParseNode lhs, Rope operatorName, ParseNode rhs) { + public ParseNode newOpConstAsgn(SourceIndexLength position, ParseNode lhs, TruffleString operatorName, + ParseNode rhs) { // FIXME: Maybe need to fixup position? if (lhs != null) { return new OpAsgnConstDeclParseNode(position, lhs, operatorName, rhs); @@ -1068,8 +1065,8 @@ public ParseNode newOpConstAsgn(SourceIndexLength position, ParseNode lhs, Rope } } - public boolean isLazy(Rope callType) { - return callType == RopeConstants.AMPERSAND_DOT; + public boolean isLazy(TruffleString callType) { + return callType == TStringConstants.AMPERSAND_DOT; } public ParseNode new_attrassign(SourceIndexLength position, ParseNode receiver, String name, ParseNode args, @@ -1077,7 +1074,8 @@ public ParseNode new_attrassign(SourceIndexLength position, ParseNode receiver, return new AttrAssignParseNode(position, receiver, name, args, isLazy); } - public ParseNode new_call(ParseNode receiver, Rope callType, Rope name, ParseNode argsNode, ParseNode iter) { + public ParseNode new_call(ParseNode receiver, TruffleString callType, TruffleString name, ParseNode argsNode, + ParseNode iter) { if (argsNode instanceof BlockPassParseNode) { if (iter != null) { lexer.compile_error(PID.BLOCK_ARG_AND_BLOCK_GIVEN, "Both block arg and actual block given."); @@ -1087,7 +1085,7 @@ public ParseNode new_call(ParseNode receiver, Rope callType, Rope name, ParseNod return new CallParseNode( position(receiver, argsNode), receiver, - name.getJavaString(), + name.toJavaStringUncached(), blockPass.getArgsNode(), blockPass, isLazy(callType)); @@ -1096,18 +1094,18 @@ public ParseNode new_call(ParseNode receiver, Rope callType, Rope name, ParseNod return new CallParseNode( position(receiver, argsNode), receiver, - name.getJavaString(), + name.toJavaStringUncached(), argsNode, iter, isLazy(callType)); } - public ParseNode new_call(ParseNode receiver, Rope name, ParseNode argsNode, ParseNode iter) { - return new_call(receiver, RopeConstants.DOT, name, argsNode, iter); + public ParseNode new_call(ParseNode receiver, TruffleString name, ParseNode argsNode, ParseNode iter) { + return new_call(receiver, TStringConstants.DOT, name, argsNode, iter); } - public Colon2ParseNode new_colon2(SourceIndexLength position, ParseNode leftNode, Rope name) { + public Colon2ParseNode new_colon2(SourceIndexLength position, ParseNode leftNode, TruffleString name) { if (leftNode == null) { return new Colon2ImplicitParseNode(position, name); } @@ -1115,7 +1113,7 @@ public Colon2ParseNode new_colon2(SourceIndexLength position, ParseNode leftNode return new Colon2ConstParseNode(position, leftNode, name); } - public Colon3ParseNode new_colon3(SourceIndexLength position, Rope name) { + public Colon3ParseNode new_colon3(SourceIndexLength position, TruffleString name) { return new Colon3ParseNode(position, name); } @@ -1142,8 +1140,8 @@ public void fixpos(ParseNode node, ParseNode orig) { node.setPosition(orig.getPosition()); } - public ParseNode new_fcall(Rope operation) { - return new FCallParseNode(lexer.tokline, operation.getJavaString()); + public ParseNode new_fcall(TruffleString operation) { + return new FCallParseNode(lexer.tokline, operation.toJavaStringUncached()); } public ParseNode new_super(SourceIndexLength position, ParseNode args) { @@ -1237,18 +1235,10 @@ public ParseNodeTuple createKeyValue(ParseNode key, ParseNode value) { return new ParseNodeTuple(key, value); } - public ParseNode asSymbol(SourceIndexLength position, String value) { - final SymbolParseNode symbolParseNode = new SymbolParseNode( - position, - value, - lexer.getEncoding(), - lexer.getTokenCR()); - checkSymbolCodeRange(symbolParseNode); - return symbolParseNode; - } - - public ParseNode asSymbol(SourceIndexLength position, Rope value) { - final SymbolParseNode symbolParseNode = new SymbolParseNode(position, value); + public ParseNode asSymbol(SourceIndexLength position, TruffleString value) { + var tstringWithCorrectEncoding = value.switchEncodingUncached(lexer.encoding.tencoding); + final SymbolParseNode symbolParseNode = new SymbolParseNode(position, tstringWithCorrectEncoding, + lexer.encoding); checkSymbolCodeRange(symbolParseNode); return symbolParseNode; } @@ -1256,7 +1246,9 @@ public ParseNode asSymbol(SourceIndexLength position, Rope value) { public ParseNode asSymbol(SourceIndexLength position, ParseNode value) { final ParseNode parseNode; if (value instanceof StrParseNode) { - final SymbolParseNode symbolParseNode = new SymbolParseNode(position, ((StrParseNode) value).getValue()); + var strParseNode = (StrParseNode) value; + final SymbolParseNode symbolParseNode = new SymbolParseNode(position, strParseNode.getValue(), + strParseNode.encoding); checkSymbolCodeRange(symbolParseNode); parseNode = symbolParseNode; } else { @@ -1266,7 +1258,7 @@ public ParseNode asSymbol(SourceIndexLength position, ParseNode value) { } private void checkSymbolCodeRange(SymbolParseNode symbolParseNode) { - if (symbolParseNode.getRope().getCodeRange() == CR_BROKEN) { + if (!symbolParseNode.getTString().isValidUncached(symbolParseNode.getRubyEncoding().tencoding)) { throw new RaiseException( RubyLanguage.getCurrentContext(), getConfiguration().getContext().getCoreExceptions().encodingError("invalid encoding symbol", null)); @@ -1299,7 +1291,7 @@ public ParseNode literal_concat(ParseNode head, ParseNode tail) { StrParseNode front = (StrParseNode) head; // string_contents always makes an empty strnode...which is sometimes valid but // never if it ever is in literal_concat. - if (front.getValue().byteLength() > 0) { + if (!front.getValue().isEmpty()) { return new StrParseNode(head.getPosition(), front, (StrParseNode) tail); } else { return tail; @@ -1323,7 +1315,7 @@ public ParseNode literal_concat(ParseNode head, ParseNode tail) { if (head instanceof StrParseNode) { //Do not add an empty string node - if (((StrParseNode) head).getValue().byteLength() == 0) { + if (((StrParseNode) head).getValue().isEmpty()) { head = createDStrNode(head.getPosition()); } else { head = createDStrNode(head.getPosition()).add(head); @@ -1434,7 +1426,7 @@ public ParseNode new_args(SourceIndexLength position, ListParseNode pre, ListPar } public ArgsTailHolder new_args_tail(SourceIndexLength position, ListParseNode keywordArg, - Rope keywordRestArgNameRope, BlockArgParseNode blockArg) { + TruffleString keywordRestArgNameRope, BlockArgParseNode blockArg) { if (keywordRestArgNameRope == null) { return new ArgsTailHolder(position, keywordArg, null, blockArg); } else if (keywordRestArgNameRope == RubyLexer.Keyword.NIL.bytes) { // def m(**nil) @@ -1446,7 +1438,7 @@ public ArgsTailHolder new_args_tail(SourceIndexLength position, ListParseNode ke if (keywordRestArgNameRope.isEmpty()) { restKwargsName = Layouts.TEMP_PREFIX + "kwrest"; } else { - restKwargsName = keywordRestArgNameRope.getJavaString().intern(); + restKwargsName = keywordRestArgNameRope.toJavaStringUncached().intern(); } int slot = currentScope.exists(restKwargsName); @@ -1499,7 +1491,7 @@ private int matchesExistingIndex(ParseNode currentNode, List encounte final ParseNode parseNode = encounteredKeys.get(i); // TODO BJF 27-Nov-17 Handle additional literal nodes, consider interface with valueEquals if (parseNode instanceof SymbolParseNode && currentNode instanceof SymbolParseNode) { - if (((SymbolParseNode) parseNode).getRope().equals(((SymbolParseNode) currentNode).getRope())) { + if (((SymbolParseNode) parseNode).valueEquals((SymbolParseNode) currentNode)) { return i; } } @@ -1545,8 +1537,8 @@ public void warning(SourceIndexLength position, String message) { } // ENEBO: Totally weird naming (in MRI is not allocated and is a local var name) [1.9] - public boolean is_local_id(Rope name) { - return lexer.isIdentifierChar(name.get(0) & 0xFF); + public boolean is_local_id(TruffleString name) { + return lexer.isIdentifierChar(name.readByteUncached(0, lexer.tencoding)); } // 1.9 @@ -1562,9 +1554,9 @@ public ListParseNode list_append(ParseNode list, ParseNode item) { } // 1.9 - public ParseNode new_bv(Rope identifier) { + public ParseNode new_bv(TruffleString identifier) { if (!is_local_id(identifier)) { - getterIdentifierError(lexer.getPosition(), identifier.getJavaString()); + getterIdentifierError(lexer.getPosition(), identifier.toJavaStringUncached()); } shadowing_lvar(identifier); @@ -1573,8 +1565,8 @@ public ParseNode new_bv(Rope identifier) { // 1.9 @SuppressFBWarnings("ES") - public ArgumentParseNode arg_var(Rope rope) { - return arg_var(rope.getJavaString()); + public ArgumentParseNode arg_var(TruffleString rope) { + return arg_var(rope.toJavaStringUncached()); } // Called with parameter names @@ -1601,7 +1593,7 @@ public ArgumentParseNode arg_var(String string) { return new ArgumentParseNode(lexer.getPosition(), name, current.addVariableThisScope(name)); } - public Rope formal_argument(Rope identifier) { + public TruffleString formal_argument(TruffleString identifier) { lexer.validateFormalIdentifier(identifier); return shadowing_lvar(identifier); @@ -1609,10 +1601,10 @@ public Rope formal_argument(Rope identifier) { // 1.9 @SuppressFBWarnings("ES") - public Rope shadowing_lvar(Rope rope) { - String name = rope.getJavaString().intern(); + public TruffleString shadowing_lvar(TruffleString tstring) { + String name = tstring.toJavaStringUncached().intern(); if (name == "_") { - return rope; + return tstring; } StaticScope current = getCurrentScope(); @@ -1620,7 +1612,7 @@ public Rope shadowing_lvar(Rope rope) { yyerror("duplicated argument name"); } - return rope; + return tstring; } // 1.9 @@ -1674,25 +1666,24 @@ public ParseNode arg_append(ParseNode node1, ParseNode node2) { } // MRI: reg_fragment_check - public Rope regexpFragmentCheck(RegexpParseNode end, Rope value) { - final RopeWithEncoding ropeWithEncoding = setRegexpEncoding(end, value); + public TStringWithEncoding regexpFragmentCheck(RegexpParseNode end, TStringWithEncoding value) { + final TStringWithEncoding strEnc = setRegexpEncoding(end, value); try { - ClassicRegexp.preprocessCheck(ropeWithEncoding); + ClassicRegexp.preprocessCheck(strEnc); } catch (DeferredRaiseException dre) { throw compile_error(dre.getException(getConfiguration().getContext()).getMessage()); } catch (RaiseException re) { throw compile_error(re.getMessage()); } - return ropeWithEncoding.getRope(); + return strEnc; } private void allocateNamedLocals(RegexpParseNode regexpNode) { - ClassicRegexp pattern = null; + final ClassicRegexp pattern; try { pattern = new ClassicRegexp( configuration.getContext(), regexpNode.getValue(), - Encodings.getBuiltInEncoding(regexpNode.getEncoding().getIndex()), regexpNode.getOptions()); } catch (DeferredRaiseException dre) { throw dre.getException(RubyLanguage.getCurrentContext()); @@ -1718,10 +1709,6 @@ private void allocateNamedLocals(RegexpParseNode regexpNode) { } } - private boolean is7BitASCII(Rope value) { - return value.isAsciiOnly(); - } - // TODO: Put somewhere more consolidated (similar) private char optionsEncodingChar(Encoding optionEncoding) { if (optionEncoding == USASCIIEncoding.INSTANCE) { @@ -1757,57 +1744,48 @@ public RuntimeException compile_error(String message) { // mri: rb_compile_error position.toSourceSection(lexer.getSource()))); } - protected void compileError(Encoding optionEncoding, Encoding encoding) { + protected void compileError(RubyEncoding optionEncoding, RubyEncoding encoding) { lexer.compile_error( PID.REGEXP_ENCODING_MISMATCH, - "regexp encoding option '" + optionsEncodingChar(optionEncoding) + + "regexp encoding option '" + + optionsEncodingChar(optionEncoding == null ? null : optionEncoding.jcoding) + "' differs from source encoding '" + encoding + "'"); } - public Encoding getEncoding(Rope name) { - return EncodingManager.getEncoding(name); - } - // MRI: reg_fragment_setenc_gen - public RopeWithEncoding setRegexpEncoding(RegexpParseNode end, Rope value) { + public TStringWithEncoding setRegexpEncoding(RegexpParseNode end, TStringWithEncoding value) { RegexpOptions options = end.getOptions(); options = options.setup(); - Encoding optionsEncoding = options.getEncoding(); - RubyEncoding encoding = Encodings.getBuiltInEncoding(value.getEncoding().getIndex()); + final RubyEncoding optionsEncoding = options.getEncoding() == null + ? null + : Encodings.getBuiltInEncoding(options.getEncoding()); + final RubyEncoding encoding = value.encoding; // Change encoding to one specified by regexp options as long as the string is compatible. if (optionsEncoding != null) { - if (optionsEncoding != value.getEncoding() && !is7BitASCII(value)) { - compileError(optionsEncoding, value.getEncoding()); + if (optionsEncoding != encoding && !value.isAsciiOnly()) { + compileError(optionsEncoding, encoding); } - value = parserRopeOperations.withEncoding(value, optionsEncoding); - encoding = Encodings.getBuiltInEncoding(optionsEncoding.getIndex()); + value = value.forceEncoding(optionsEncoding); } else if (options.isEncodingNone()) { - if (value.getEncoding() == ASCIIEncoding.INSTANCE && !is7BitASCII(value)) { - compileError(null, value.getEncoding()); + if (encoding == Encodings.BINARY && !value.isAsciiOnly()) { + compileError(null, encoding); } - value = parserRopeOperations.withEncoding(value, ASCIIEncoding.INSTANCE); - encoding = Encodings.BINARY; + value = value.forceEncoding(Encodings.BINARY); } else if (lexer.getEncoding() == USASCIIEncoding.INSTANCE) { - if (!is7BitASCII(value)) { - value = parserRopeOperations.withEncoding(value, USASCIIEncoding.INSTANCE); // This will raise later - encoding = Encodings.US_ASCII; + if (!value.isAsciiOnly()) { + value = value.forceEncoding(Encodings.US_ASCII); // This will raise later } else { - value = parserRopeOperations.withEncoding(value, ASCIIEncoding.INSTANCE); - encoding = Encodings.BINARY; + value = value.forceEncoding(Encodings.BINARY); } } - return new RopeWithEncoding(value, encoding); + return value; } - protected ClassicRegexp checkRegexpSyntax(Rope value, RegexpOptions options) { + protected ClassicRegexp checkRegexpSyntax(TStringWithEncoding value, RegexpOptions options) { try { // This is only for syntax checking but this will as a side effect create an entry in the regexp cache. - return new ClassicRegexp( - getConfiguration().getContext(), - value, - Encodings.getBuiltInEncoding(value.getEncoding().getIndex()), - options); + return new ClassicRegexp(getConfiguration().getContext(), value, options); } catch (DeferredRaiseException dre) { throw compile_error(dre.getException(getConfiguration().getContext()).getMessage()); } catch (RaiseException re) { @@ -1816,19 +1794,20 @@ protected ClassicRegexp checkRegexpSyntax(Rope value, RegexpOptions options) { } public ParseNode newRegexpNode(SourceIndexLength position, ParseNode contents, RegexpParseNode end) { - RegexpOptions options = end.getOptions().setup(); - Encoding encoding = lexer.getEncoding(); + final RegexpOptions options = end.getOptions().setup(); + final Encoding encoding = lexer.getEncoding(); if (contents == null) { - Rope newValue = RopeConstants.EMPTY_US_ASCII_ROPE; + TStringWithEncoding newValue = new TStringWithEncoding(TStringConstants.EMPTY_US_ASCII, + Encodings.US_ASCII); if (encoding != null) { - newValue = parserRopeOperations.withEncoding(newValue, encoding); + newValue = newValue.forceEncoding(Encodings.getBuiltInEncoding(encoding)); } newValue = regexpFragmentCheck(end, newValue); return new RegexpParseNode(position, newValue, options.withoutOnce()); } else if (contents instanceof StrParseNode) { - Rope meat = ((StrParseNode) contents).getValue(); + TStringWithEncoding meat = ((StrParseNode) contents).getTStringWithEncoding(); meat = regexpFragmentCheck(end, meat); checkRegexpSyntax(meat, options.withoutOnce()); return new RegexpParseNode(contents.getPosition(), meat, options.withoutOnce()); @@ -1838,8 +1817,7 @@ public ParseNode newRegexpNode(SourceIndexLength position, ParseNode contents, R for (int i = 0; i < dStrNode.size(); i++) { ParseNode fragment = dStrNode.get(i); if (fragment instanceof StrParseNode) { - Rope frag = ((StrParseNode) fragment).getValue(); - regexpFragmentCheck(end, frag); + regexpFragmentCheck(end, ((StrParseNode) fragment).getTStringWithEncoding()); } } @@ -1850,10 +1828,9 @@ public ParseNode newRegexpNode(SourceIndexLength position, ParseNode contents, R } // EvStrParseNode: #{val}: no fragment check, but at least set encoding - Rope master = createMaster(options); + TStringWithEncoding master = createMaster(options); master = regexpFragmentCheck(end, master); - encoding = master.getEncoding(); - DRegexpParseNode node = new DRegexpParseNode(position, options, encoding); + DRegexpParseNode node = new DRegexpParseNode(position, options, master.encoding.jcoding); node.add(new StrParseNode(contents.getPosition(), master)); node.add(contents); return node; @@ -1862,9 +1839,10 @@ public ParseNode newRegexpNode(SourceIndexLength position, ParseNode contents, R // Create the magical empty 'master' string which will be encoded with // regexp options encoding so dregexps can end up starting with the // right encoding. - private Rope createMaster(RegexpOptions options) { + private TStringWithEncoding createMaster(RegexpOptions options) { final Encoding encoding = options.getEncoding(); - return RopeOperations.emptyRope(encoding == null ? ASCIIEncoding.INSTANCE : encoding); + final RubyEncoding enc = encoding == null ? Encodings.BINARY : Encodings.getBuiltInEncoding(encoding); + return new TStringWithEncoding(enc.tencoding.getEmpty(), enc); } public KeywordArgParseNode keyword_arg(SourceIndexLength position, AssignableParseNode assignable) { @@ -1894,7 +1872,7 @@ public ParseNode new_defined(SourceIndexLength position, ParseNode something) { return new DefinedParseNode(position, makeNullNil(something)); } - public static final Rope INTERNAL_ID = RopeConstants.EMPTY_US_ASCII_ROPE; + public static final TruffleString INTERNAL_ID = TStringConstants.EMPTY_US_ASCII; public SourceIndexLength extendedUntil(SourceIndexLength start, SourceIndexLength end) { return new SourceIndexLength(start.getCharIndex(), end.getCharEnd() - start.getCharIndex()); diff --git a/src/main/java/org/truffleruby/parser/parser/RubyParser.java b/src/main/java/org/truffleruby/parser/parser/RubyParser.java index 765b017ba89c..80c0514ea154 100644 --- a/src/main/java/org/truffleruby/parser/parser/RubyParser.java +++ b/src/main/java/org/truffleruby/parser/parser/RubyParser.java @@ -40,15 +40,14 @@ // line 2 "RubyParser.y" package org.truffleruby.parser.parser; -import org.jcodings.Encoding; -import org.jcodings.specific.UTF8Encoding; +import com.oracle.truffle.api.strings.TruffleString; + import org.truffleruby.Layouts; import org.truffleruby.SuppressFBWarnings; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.RubyDeferredWarnings; import org.truffleruby.parser.ast.ArgsParseNode; @@ -136,7 +135,6 @@ import org.truffleruby.parser.lexer.StrTerm; import org.truffleruby.parser.lexer.SyntaxException.PID; -import static org.truffleruby.core.rope.CodeRange.CR_UNKNOWN; import static org.truffleruby.parser.lexer.RubyLexer.EXPR_BEG; import static org.truffleruby.parser.lexer.RubyLexer.EXPR_END; import static org.truffleruby.parser.lexer.RubyLexer.EXPR_ENDARG; @@ -159,7 +157,7 @@ public RubyParser(LexerSource source, RubyDeferredWarnings warnings) { this.lexer = new RubyLexer(support, source, warnings); support.setLexer(lexer); } -// line 127 "-" +// line 125 "-" // %token constants public static final int keyword_class = 257; public static final int keyword_module = 258; @@ -1163,11 +1161,11 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[21] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new VAliasParseNode(((SourceIndexLength)yyVals[-2+yyTop]), support.symbolID(((Rope)yyVals[-1+yyTop])), support.symbolID(((Rope)yyVals[0+yyTop]))); + yyVal = new VAliasParseNode(((SourceIndexLength)yyVals[-2+yyTop]), support.symbolID(((TruffleString)yyVals[-1+yyTop])), support.symbolID(((TruffleString)yyVals[0+yyTop]))); return yyVal; }; states[22] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new VAliasParseNode(((SourceIndexLength)yyVals[-2+yyTop]), support.symbolID(((Rope)yyVals[-1+yyTop])), support.symbolID(((BackRefParseNode)yyVals[0+yyTop]).getByteName())); + yyVal = new VAliasParseNode(((SourceIndexLength)yyVals[-2+yyTop]), support.symbolID(((TruffleString)yyVals[-1+yyTop])), support.symbolID(((BackRefParseNode)yyVals[0+yyTop]).getByteName())); return yyVal; }; states[23] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1246,11 +1244,11 @@ public Object yyparse (RubyLexer yyLex) { value_expr(lexer, ((ParseNode)yyVals[0+yyTop])); SourceIndexLength pos = ((AssignableParseNode)yyVals[-2+yyTop]).getPosition(); - Rope asgnOp = ((Rope)yyVals[-1+yyTop]); - if (asgnOp == RopeConstants.OR_OR) { + TruffleString asgnOp = ((TruffleString)yyVals[-1+yyTop]); + if (asgnOp == TStringConstants.OR_OR) { ((AssignableParseNode)yyVals[-2+yyTop]).setValueNode(((ParseNode)yyVals[0+yyTop])); yyVal = new OpAsgnOrParseNode(pos, support.gettable2(((AssignableParseNode)yyVals[-2+yyTop])), ((AssignableParseNode)yyVals[-2+yyTop])); - } else if (asgnOp == RopeConstants.AMPERSAND_AMPERSAND) { + } else if (asgnOp == TStringConstants.AMPERSAND_AMPERSAND) { ((AssignableParseNode)yyVals[-2+yyTop]).setValueNode(((ParseNode)yyVals[0+yyTop])); yyVal = new OpAsgnAndParseNode(pos, support.gettable2(((AssignableParseNode)yyVals[-2+yyTop])), ((AssignableParseNode)yyVals[-2+yyTop])); } else { @@ -1262,27 +1260,27 @@ public Object yyparse (RubyLexer yyLex) { }; states[39] = (support, lexer, yyVal, yyVals, yyTop) -> { /* FIXME: arg_concat logic missing for opt_call_args*/ - yyVal = support.new_opElementAsgnNode(((ParseNode)yyVals[-5+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop])); + yyVal = support.new_opElementAsgnNode(((ParseNode)yyVals[-5+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop])); return yyVal; }; states[40] = (support, lexer, yyVal, yyVals, yyTop) -> { value_expr(lexer, ((ParseNode)yyVals[0+yyTop])); - yyVal = support.newOpAsgn(support.getPosition(((ParseNode)yyVals[-4+yyTop])), ((ParseNode)yyVals[-4+yyTop]), ((Rope)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop]), ((Rope)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop])); + yyVal = support.newOpAsgn(support.getPosition(((ParseNode)yyVals[-4+yyTop])), ((ParseNode)yyVals[-4+yyTop]), ((TruffleString)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop])); return yyVal; }; states[41] = (support, lexer, yyVal, yyVals, yyTop) -> { value_expr(lexer, ((ParseNode)yyVals[0+yyTop])); - yyVal = support.newOpAsgn(support.getPosition(((ParseNode)yyVals[-4+yyTop])), ((ParseNode)yyVals[-4+yyTop]), ((Rope)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop]), ((Rope)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop])); + yyVal = support.newOpAsgn(support.getPosition(((ParseNode)yyVals[-4+yyTop])), ((ParseNode)yyVals[-4+yyTop]), ((TruffleString)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop])); return yyVal; }; states[42] = (support, lexer, yyVal, yyVals, yyTop) -> { SourceIndexLength pos = ((ParseNode)yyVals[-4+yyTop]).getPosition(); - yyVal = support.newOpConstAsgn(pos, support.new_colon2(pos, ((ParseNode)yyVals[-4+yyTop]), ((Rope)yyVals[-2+yyTop])), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop])); + yyVal = support.newOpConstAsgn(pos, support.new_colon2(pos, ((ParseNode)yyVals[-4+yyTop]), ((TruffleString)yyVals[-2+yyTop])), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop])); return yyVal; }; states[43] = (support, lexer, yyVal, yyVals, yyTop) -> { value_expr(lexer, ((ParseNode)yyVals[0+yyTop])); - yyVal = support.newOpAsgn(support.getPosition(((ParseNode)yyVals[-4+yyTop])), ((ParseNode)yyVals[-4+yyTop]), ((Rope)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop]), ((Rope)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop])); + yyVal = support.newOpAsgn(support.getPosition(((ParseNode)yyVals[-4+yyTop])), ((ParseNode)yyVals[-4+yyTop]), ((TruffleString)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop])); return yyVal; }; states[44] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1308,11 +1306,11 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[51] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(support.getConditionNode(((ParseNode)yyVals[0+yyTop])), RopeConstants.BANG); + yyVal = support.getOperatorCallNode(support.getConditionNode(((ParseNode)yyVals[0+yyTop])), TStringConstants.BANG); return yyVal; }; states[52] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(support.getConditionNode(((ParseNode)yyVals[0+yyTop])), ((Rope)yyVals[-1+yyTop])); + yyVal = support.getOperatorCallNode(support.getConditionNode(((ParseNode)yyVals[0+yyTop])), ((TruffleString)yyVals[-1+yyTop])); return yyVal; }; states[54] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1320,7 +1318,7 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[58] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), ((Rope)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), null); + yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), null); return yyVal; }; states[59] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1328,7 +1326,7 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[60] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_fcall(((Rope)yyVals[0+yyTop])); + yyVal = support.new_fcall(((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[61] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1342,19 +1340,19 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[63] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), ((Rope)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), null); + yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), null); return yyVal; }; states[64] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_call(((ParseNode)yyVals[-4+yyTop]), ((Rope)yyVals[-3+yyTop]), ((Rope)yyVals[-2+yyTop]), ((ParseNode)yyVals[-1+yyTop]), ((IterParseNode)yyVals[0+yyTop])); + yyVal = support.new_call(((ParseNode)yyVals[-4+yyTop]), ((TruffleString)yyVals[-3+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((ParseNode)yyVals[-1+yyTop]), ((IterParseNode)yyVals[0+yyTop])); return yyVal; }; states[65] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), null); + yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), null); return yyVal; }; states[66] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_call(((ParseNode)yyVals[-4+yyTop]), ((Rope)yyVals[-2+yyTop]), ((ParseNode)yyVals[-1+yyTop]), ((IterParseNode)yyVals[0+yyTop])); + yyVal = support.new_call(((ParseNode)yyVals[-4+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((ParseNode)yyVals[-1+yyTop]), ((IterParseNode)yyVals[0+yyTop])); return yyVal; }; states[67] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1450,24 +1448,24 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[92] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.assignableLabelOrIdentifier(((Rope)yyVals[0+yyTop]), null); + yyVal = support.assignableLabelOrIdentifier(((TruffleString)yyVals[0+yyTop]), null); return yyVal; }; states[93] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new InstAsgnParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop])), NilImplicitParseNode.NIL); + yyVal = new InstAsgnParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop])), NilImplicitParseNode.NIL); return yyVal; }; states[94] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new GlobalAsgnParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop])), NilImplicitParseNode.NIL); + yyVal = new GlobalAsgnParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop])), NilImplicitParseNode.NIL); return yyVal; }; states[95] = (support, lexer, yyVal, yyVals, yyTop) -> { if (support.isInDef()) support.compile_error("dynamic constant assignment"); - yyVal = new ConstDeclParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop])), null, NilImplicitParseNode.NIL); + yyVal = new ConstDeclParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop])), null, NilImplicitParseNode.NIL); return yyVal; }; states[96] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new ClassVarAsgnParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop])), NilImplicitParseNode.NIL); + yyVal = new ClassVarAsgnParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop])), NilImplicitParseNode.NIL); return yyVal; }; states[97] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1510,15 +1508,15 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[105] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.attrset(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((Rope)yyVals[0+yyTop])); + yyVal = support.attrset(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[106] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.attrset(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[0+yyTop])); + yyVal = support.attrset(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[107] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.attrset(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((Rope)yyVals[0+yyTop])); + yyVal = support.attrset(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[108] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1526,7 +1524,7 @@ public Object yyparse (RubyLexer yyLex) { SourceIndexLength position = support.getPosition(((ParseNode)yyVals[-2+yyTop])); - yyVal = new ConstDeclParseNode(position, (Rope) null, support.new_colon2(position, ((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[0+yyTop])), NilImplicitParseNode.NIL); + yyVal = new ConstDeclParseNode(position, (TruffleString) null, support.new_colon2(position, ((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[0+yyTop])), NilImplicitParseNode.NIL); return yyVal; }; states[109] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1536,7 +1534,7 @@ public Object yyparse (RubyLexer yyLex) { SourceIndexLength position = lexer.tokline; - yyVal = new ConstDeclParseNode(position, (Rope) null, support.new_colon3(position, ((Rope)yyVals[0+yyTop])), NilImplicitParseNode.NIL); + yyVal = new ConstDeclParseNode(position, (TruffleString) null, support.new_colon3(position, ((TruffleString)yyVals[0+yyTop])), NilImplicitParseNode.NIL); return yyVal; }; states[110] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1544,25 +1542,25 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[111] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.assignableLabelOrIdentifier(((Rope)yyVals[0+yyTop]), null); + yyVal = support.assignableLabelOrIdentifier(((TruffleString)yyVals[0+yyTop]), null); return yyVal; }; states[112] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new InstAsgnParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop])), NilImplicitParseNode.NIL); + yyVal = new InstAsgnParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop])), NilImplicitParseNode.NIL); return yyVal; }; states[113] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new GlobalAsgnParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop])), NilImplicitParseNode.NIL); + yyVal = new GlobalAsgnParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop])), NilImplicitParseNode.NIL); return yyVal; }; states[114] = (support, lexer, yyVal, yyVals, yyTop) -> { if (support.isInDef()) support.compile_error("dynamic constant assignment"); - yyVal = new ConstDeclParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop])), null, NilImplicitParseNode.NIL); + yyVal = new ConstDeclParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop])), null, NilImplicitParseNode.NIL); return yyVal; }; states[115] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new ClassVarAsgnParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop])), NilImplicitParseNode.NIL); + yyVal = new ClassVarAsgnParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop])), NilImplicitParseNode.NIL); return yyVal; }; states[116] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1605,15 +1603,15 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[124] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.attrset(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((Rope)yyVals[0+yyTop])); + yyVal = support.attrset(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[125] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.attrset(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[0+yyTop])); + yyVal = support.attrset(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[126] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.attrset(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((Rope)yyVals[0+yyTop])); + yyVal = support.attrset(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[127] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1623,7 +1621,7 @@ public Object yyparse (RubyLexer yyLex) { SourceIndexLength position = support.getPosition(((ParseNode)yyVals[-2+yyTop])); - yyVal = new ConstDeclParseNode(position, (Rope) null, support.new_colon2(position, ((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[0+yyTop])), NilImplicitParseNode.NIL); + yyVal = new ConstDeclParseNode(position, (TruffleString) null, support.new_colon2(position, ((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[0+yyTop])), NilImplicitParseNode.NIL); return yyVal; }; states[128] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1633,7 +1631,7 @@ public Object yyparse (RubyLexer yyLex) { SourceIndexLength position = lexer.tokline; - yyVal = new ConstDeclParseNode(position, (Rope) null, support.new_colon3(position, ((Rope)yyVals[0+yyTop])), NilImplicitParseNode.NIL); + yyVal = new ConstDeclParseNode(position, (TruffleString) null, support.new_colon3(position, ((TruffleString)yyVals[0+yyTop])), NilImplicitParseNode.NIL); return yyVal; }; states[129] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1645,49 +1643,49 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[131] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[132] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_colon3(lexer.tokline, ((Rope)yyVals[0+yyTop])); + yyVal = support.new_colon3(lexer.tokline, ((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[133] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_colon2(lexer.tokline, null, ((Rope)yyVals[0+yyTop])); + yyVal = support.new_colon2(lexer.tokline, null, ((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[134] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_colon2(support.getPosition(((ParseNode)yyVals[-2+yyTop])), ((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[0+yyTop])); + yyVal = support.new_colon2(support.getPosition(((ParseNode)yyVals[-2+yyTop])), ((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[135] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[136] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[137] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[138] = (support, lexer, yyVal, yyVals, yyTop) -> { lexer.setState(EXPR_ENDFN); - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[139] = (support, lexer, yyVal, yyVals, yyTop) -> { lexer.setState(EXPR_ENDFN); - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[140] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new LiteralParseNode(lexer.getPosition(), support.symbolID(((Rope)yyVals[0+yyTop]))); + yyVal = new LiteralParseNode(lexer.getPosition(), support.symbolID(((TruffleString)yyVals[0+yyTop]))); return yyVal; }; states[141] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new LiteralParseNode(lexer.getPosition(), support.symbolID(((Rope)yyVals[0+yyTop]))); + yyVal = new LiteralParseNode(lexer.getPosition(), support.symbolID(((TruffleString)yyVals[0+yyTop]))); return yyVal; }; states[142] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -1711,123 +1709,123 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[147] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[148] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[149] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[150] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[151] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[152] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[153] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[154] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[155] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[156] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[157] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[158] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[159] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[160] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[161] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[162] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[163] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[164] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[165] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[166] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[167] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[168] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[169] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[170] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[171] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[172] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[173] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[174] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[175] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[176] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[177] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2008,11 +2006,11 @@ public Object yyparse (RubyLexer yyLex) { value_expr(lexer, ((ParseNode)yyVals[0+yyTop])); SourceIndexLength pos = ((AssignableParseNode)yyVals[-2+yyTop]).getPosition(); - Rope asgnOp = ((Rope)yyVals[-1+yyTop]); - if (asgnOp == RopeConstants.OR_OR) { + TruffleString asgnOp = ((TruffleString)yyVals[-1+yyTop]); + if (asgnOp == TStringConstants.OR_OR) { ((AssignableParseNode)yyVals[-2+yyTop]).setValueNode(((ParseNode)yyVals[0+yyTop])); yyVal = new OpAsgnOrParseNode(pos, support.gettable2(((AssignableParseNode)yyVals[-2+yyTop])), ((AssignableParseNode)yyVals[-2+yyTop])); - } else if (asgnOp == RopeConstants.AMPERSAND_AMPERSAND) { + } else if (asgnOp == TStringConstants.AMPERSAND_AMPERSAND) { ((AssignableParseNode)yyVals[-2+yyTop]).setValueNode(((ParseNode)yyVals[0+yyTop])); yyVal = new OpAsgnAndParseNode(pos, support.gettable2(((AssignableParseNode)yyVals[-2+yyTop])), ((AssignableParseNode)yyVals[-2+yyTop])); } else { @@ -2024,32 +2022,32 @@ public Object yyparse (RubyLexer yyLex) { }; states[221] = (support, lexer, yyVal, yyVals, yyTop) -> { /* FIXME: arg_concat missing for opt_call_args*/ - yyVal = support.new_opElementAsgnNode(((ParseNode)yyVals[-5+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop])); + yyVal = support.new_opElementAsgnNode(((ParseNode)yyVals[-5+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop])); return yyVal; }; states[222] = (support, lexer, yyVal, yyVals, yyTop) -> { value_expr(lexer, ((ParseNode)yyVals[0+yyTop])); - yyVal = support.newOpAsgn(support.getPosition(((ParseNode)yyVals[-4+yyTop])), ((ParseNode)yyVals[-4+yyTop]), ((Rope)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop]), ((Rope)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop])); + yyVal = support.newOpAsgn(support.getPosition(((ParseNode)yyVals[-4+yyTop])), ((ParseNode)yyVals[-4+yyTop]), ((TruffleString)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop])); return yyVal; }; states[223] = (support, lexer, yyVal, yyVals, yyTop) -> { value_expr(lexer, ((ParseNode)yyVals[0+yyTop])); - yyVal = support.newOpAsgn(support.getPosition(((ParseNode)yyVals[-4+yyTop])), ((ParseNode)yyVals[-4+yyTop]), ((Rope)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop]), ((Rope)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop])); + yyVal = support.newOpAsgn(support.getPosition(((ParseNode)yyVals[-4+yyTop])), ((ParseNode)yyVals[-4+yyTop]), ((TruffleString)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop])); return yyVal; }; states[224] = (support, lexer, yyVal, yyVals, yyTop) -> { value_expr(lexer, ((ParseNode)yyVals[0+yyTop])); - yyVal = support.newOpAsgn(support.getPosition(((ParseNode)yyVals[-4+yyTop])), ((ParseNode)yyVals[-4+yyTop]), ((Rope)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop]), ((Rope)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop])); + yyVal = support.newOpAsgn(support.getPosition(((ParseNode)yyVals[-4+yyTop])), ((ParseNode)yyVals[-4+yyTop]), ((TruffleString)yyVals[-3+yyTop]), ((ParseNode)yyVals[0+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop])); return yyVal; }; states[225] = (support, lexer, yyVal, yyVals, yyTop) -> { SourceIndexLength pos = support.getPosition(((ParseNode)yyVals[-4+yyTop])); - yyVal = support.newOpConstAsgn(pos, support.new_colon2(pos, ((ParseNode)yyVals[-4+yyTop]), ((Rope)yyVals[-2+yyTop])), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop])); + yyVal = support.newOpConstAsgn(pos, support.new_colon2(pos, ((ParseNode)yyVals[-4+yyTop]), ((TruffleString)yyVals[-2+yyTop])), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop])); return yyVal; }; states[226] = (support, lexer, yyVal, yyVals, yyTop) -> { SourceIndexLength pos = lexer.getPosition(); - yyVal = support.newOpConstAsgn(pos, new Colon3ParseNode(pos, support.symbolID(((Rope)yyVals[-2+yyTop]))), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop])); + yyVal = support.newOpConstAsgn(pos, new Colon3ParseNode(pos, support.symbolID(((TruffleString)yyVals[-2+yyTop]))), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop])); return yyVal; }; states[227] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2101,55 +2099,55 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[234] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[235] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[236] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[237] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[238] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[239] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[240] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(support.getOperatorCallNode(((NumericParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()), ((Rope)yyVals[-3+yyTop])); + yyVal = support.getOperatorCallNode(support.getOperatorCallNode(((NumericParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()), ((TruffleString)yyVals[-3+yyTop])); return yyVal; }; states[241] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[0+yyTop]), ((Rope)yyVals[-1+yyTop])); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[0+yyTop]), ((TruffleString)yyVals[-1+yyTop])); return yyVal; }; states[242] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[0+yyTop]), ((Rope)yyVals[-1+yyTop])); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[0+yyTop]), ((TruffleString)yyVals[-1+yyTop])); return yyVal; }; states[243] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[244] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[245] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[246] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[247] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2157,15 +2155,15 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[248] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[249] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[250] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[251] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2179,23 +2177,23 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[252] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[253] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(support.getConditionNode(((ParseNode)yyVals[0+yyTop])), ((Rope)yyVals[-1+yyTop])); + yyVal = support.getOperatorCallNode(support.getConditionNode(((ParseNode)yyVals[0+yyTop])), ((TruffleString)yyVals[-1+yyTop])); return yyVal; }; states[254] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[0+yyTop]), ((Rope)yyVals[-1+yyTop])); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[0+yyTop]), ((TruffleString)yyVals[-1+yyTop])); return yyVal; }; states[255] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[256] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[257] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2220,28 +2218,28 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[262] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[263] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[264] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[265] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[266] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[267] = (support, lexer, yyVal, yyVals, yyTop) -> { - support.warning(lexer.getPosition(), "comparison '" + ((Rope)yyVals[-1+yyTop]).getJavaString() + "' after comparison"); - yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); + support.warning(lexer.getPosition(), "comparison '" + ((TruffleString)yyVals[-1+yyTop]).toJavaStringUncached() + "' after comparison"); + yyVal = support.getOperatorCallNode(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), lexer.getPosition()); return yyVal; }; states[268] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2428,7 +2426,7 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[314] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_fcall(((Rope)yyVals[0+yyTop])); + yyVal = support.new_fcall(((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[315] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2474,11 +2472,11 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[323] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_colon2(support.getPosition(((ParseNode)yyVals[-2+yyTop])), ((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[0+yyTop])); + yyVal = support.new_colon2(support.getPosition(((ParseNode)yyVals[-2+yyTop])), ((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[324] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_colon3(lexer.tokline, ((Rope)yyVals[0+yyTop])); + yyVal = support.new_colon3(lexer.tokline, ((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[325] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2515,11 +2513,11 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[332] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(support.getConditionNode(((ParseNode)yyVals[-1+yyTop])), RopeConstants.BANG); + yyVal = support.getOperatorCallNode(support.getConditionNode(((ParseNode)yyVals[-1+yyTop])), TStringConstants.BANG); return yyVal; }; states[333] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.getOperatorCallNode(NilImplicitParseNode.NIL, RopeConstants.BANG); + yyVal = support.getOperatorCallNode(NilImplicitParseNode.NIL, TStringConstants.BANG); return yyVal; }; states[334] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2653,7 +2651,7 @@ public Object yyparse (RubyLexer yyLex) { support.pushLocalScope(); yyVal = lexer.getCurrentArg(); lexer.setCurrentArg(null); - support.checkMethodName(((Rope)yyVals[0+yyTop])); + support.checkMethodName(((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[359] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2664,10 +2662,10 @@ public Object yyparse (RubyLexer yyLex) { states[360] = (support, lexer, yyVal, yyVals, yyTop) -> { ParseNode body = support.makeNullNil(((ParseNode)yyVals[-1+yyTop])); - yyVal = new DefnParseNode(support.extendedUntil(((SourceIndexLength)yyVals[-6+yyTop]), ((SourceIndexLength)yyVals[0+yyTop])), support.symbolID(((Rope)yyVals[-5+yyTop])), (ArgsParseNode) yyVals[-2+yyTop], support.getCurrentScope(), body); + yyVal = new DefnParseNode(support.extendedUntil(((SourceIndexLength)yyVals[-6+yyTop]), ((SourceIndexLength)yyVals[0+yyTop])), support.symbolID(((TruffleString)yyVals[-5+yyTop])), (ArgsParseNode) yyVals[-2+yyTop], support.getCurrentScope(), body); support.popCurrentScope(); support.setInDef(((Boolean)yyVals[-3+yyTop]).booleanValue()); - lexer.setCurrentArg(((Rope)yyVals[-4+yyTop])); + lexer.setCurrentArg(((TruffleString)yyVals[-4+yyTop])); return yyVal; }; states[361] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2681,17 +2679,17 @@ public Object yyparse (RubyLexer yyLex) { lexer.setState(EXPR_ENDFN|EXPR_LABEL); /* force for args */ yyVal = lexer.getCurrentArg(); lexer.setCurrentArg(null); - support.checkMethodName(((Rope)yyVals[0+yyTop])); + support.checkMethodName(((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[363] = (support, lexer, yyVal, yyVals, yyTop) -> { ParseNode body = ((ParseNode)yyVals[-1+yyTop]); if (body == null) body = NilImplicitParseNode.NIL; - yyVal = new DefsParseNode(support.extendedUntil(((SourceIndexLength)yyVals[-8+yyTop]), ((SourceIndexLength)yyVals[0+yyTop])), ((ParseNode)yyVals[-7+yyTop]), support.symbolID(((Rope)yyVals[-4+yyTop])), (ArgsParseNode) yyVals[-2+yyTop], support.getCurrentScope(), body); + yyVal = new DefsParseNode(support.extendedUntil(((SourceIndexLength)yyVals[-8+yyTop]), ((SourceIndexLength)yyVals[0+yyTop])), ((ParseNode)yyVals[-7+yyTop]), support.symbolID(((TruffleString)yyVals[-4+yyTop])), (ArgsParseNode) yyVals[-2+yyTop], support.getCurrentScope(), body); support.popCurrentScope(); support.setInDef(((Boolean)yyVals[-5+yyTop]).booleanValue()); - lexer.setCurrentArg(((Rope)yyVals[-3+yyTop])); + lexer.setCurrentArg(((TruffleString)yyVals[-3+yyTop])); return yyVal; }; states[364] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2741,7 +2739,7 @@ public Object yyparse (RubyLexer yyLex) { }; states[382] = (support, lexer, yyVal, yyVals, yyTop) -> yyVal; states[383] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.assignableInCurr(((Rope)yyVals[0+yyTop]), NilImplicitParseNode.NIL); + yyVal = support.assignableInCurr(((TruffleString)yyVals[0+yyTop]), NilImplicitParseNode.NIL); return yyVal; }; states[384] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2761,11 +2759,11 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[388] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new MultipleAsgnParseNode(((ListParseNode)yyVals[-3+yyTop]).getPosition(), ((ListParseNode)yyVals[-3+yyTop]), support.assignableInCurr(((Rope)yyVals[0+yyTop]), null), null); + yyVal = new MultipleAsgnParseNode(((ListParseNode)yyVals[-3+yyTop]).getPosition(), ((ListParseNode)yyVals[-3+yyTop]), support.assignableInCurr(((TruffleString)yyVals[0+yyTop]), null), null); return yyVal; }; states[389] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new MultipleAsgnParseNode(((ListParseNode)yyVals[-5+yyTop]).getPosition(), ((ListParseNode)yyVals[-5+yyTop]), support.assignableInCurr(((Rope)yyVals[-2+yyTop]), null), ((ListParseNode)yyVals[0+yyTop])); + yyVal = new MultipleAsgnParseNode(((ListParseNode)yyVals[-5+yyTop]).getPosition(), ((ListParseNode)yyVals[-5+yyTop]), support.assignableInCurr(((TruffleString)yyVals[-2+yyTop]), null), ((ListParseNode)yyVals[0+yyTop])); return yyVal; }; states[390] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2777,11 +2775,11 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[392] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new MultipleAsgnParseNode(lexer.getPosition(), null, support.assignableInCurr(((Rope)yyVals[0+yyTop]), null), null); + yyVal = new MultipleAsgnParseNode(lexer.getPosition(), null, support.assignableInCurr(((TruffleString)yyVals[0+yyTop]), null), null); return yyVal; }; states[393] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new MultipleAsgnParseNode(lexer.getPosition(), null, support.assignableInCurr(((Rope)yyVals[-2+yyTop]), null), ((ListParseNode)yyVals[0+yyTop])); + yyVal = new MultipleAsgnParseNode(lexer.getPosition(), null, support.assignableInCurr(((TruffleString)yyVals[-2+yyTop]), null), ((ListParseNode)yyVals[0+yyTop])); return yyVal; }; states[394] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2793,15 +2791,15 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[396] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_args_tail(((ListParseNode)yyVals[-3+yyTop]).getPosition(), ((ListParseNode)yyVals[-3+yyTop]), ((Rope)yyVals[-1+yyTop]), ((BlockArgParseNode)yyVals[0+yyTop])); + yyVal = support.new_args_tail(((ListParseNode)yyVals[-3+yyTop]).getPosition(), ((ListParseNode)yyVals[-3+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((BlockArgParseNode)yyVals[0+yyTop])); return yyVal; }; states[397] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_args_tail(((ListParseNode)yyVals[-1+yyTop]).getPosition(), ((ListParseNode)yyVals[-1+yyTop]), (Rope) null, ((BlockArgParseNode)yyVals[0+yyTop])); + yyVal = support.new_args_tail(((ListParseNode)yyVals[-1+yyTop]).getPosition(), ((ListParseNode)yyVals[-1+yyTop]), (TruffleString) null, ((BlockArgParseNode)yyVals[0+yyTop])); return yyVal; }; states[398] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_args_tail(lexer.getPosition(), null, ((Rope)yyVals[-1+yyTop]), ((BlockArgParseNode)yyVals[0+yyTop])); + yyVal = support.new_args_tail(lexer.getPosition(), null, ((TruffleString)yyVals[-1+yyTop]), ((BlockArgParseNode)yyVals[0+yyTop])); return yyVal; }; states[399] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2809,7 +2807,7 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[400] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_args_tail(((BlockArgParseNode)yyVals[0+yyTop]).getPosition(), null, (Rope) null, ((BlockArgParseNode)yyVals[0+yyTop])); + yyVal = support.new_args_tail(((BlockArgParseNode)yyVals[0+yyTop]).getPosition(), null, (TruffleString) null, ((BlockArgParseNode)yyVals[0+yyTop])); return yyVal; }; states[401] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2817,7 +2815,7 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[402] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_args_tail(lexer.getPosition(), null, (Rope) null, null); + yyVal = support.new_args_tail(lexer.getPosition(), null, (TruffleString) null, null); return yyVal; }; states[403] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -2925,7 +2923,7 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[428] = (support, lexer, yyVal, yyVals, yyTop) -> { - support.new_bv(((Rope)yyVals[0+yyTop])); + support.new_bv(((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[429] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3001,15 +2999,15 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[442] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), ((Rope)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), null); + yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), null); return yyVal; }; states[443] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_call(((ParseNode)yyVals[-4+yyTop]), ((Rope)yyVals[-3+yyTop]), ((Rope)yyVals[-2+yyTop]), ((ParseNode)yyVals[-1+yyTop]), ((IterParseNode)yyVals[0+yyTop])); + yyVal = support.new_call(((ParseNode)yyVals[-4+yyTop]), ((TruffleString)yyVals[-3+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((ParseNode)yyVals[-1+yyTop]), ((IterParseNode)yyVals[0+yyTop])); return yyVal; }; states[444] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_call(((ParseNode)yyVals[-4+yyTop]), ((Rope)yyVals[-3+yyTop]), ((Rope)yyVals[-2+yyTop]), ((ParseNode)yyVals[-1+yyTop]), ((IterParseNode)yyVals[0+yyTop])); + yyVal = support.new_call(((ParseNode)yyVals[-4+yyTop]), ((TruffleString)yyVals[-3+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((ParseNode)yyVals[-1+yyTop]), ((IterParseNode)yyVals[0+yyTop])); return yyVal; }; states[445] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3018,23 +3016,23 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[446] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), ((Rope)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), null); + yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), ((TruffleString)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), null); return yyVal; }; states[447] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), ((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), null); + yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]), null); return yyVal; }; states[448] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_call(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[0+yyTop]), null, null); + yyVal = support.new_call(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[0+yyTop]), null, null); return yyVal; }; states[449] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_call(((ParseNode)yyVals[-2+yyTop]), ((Rope)yyVals[-1+yyTop]), RopeConstants.CALL, ((ParseNode)yyVals[0+yyTop]), null); + yyVal = support.new_call(((ParseNode)yyVals[-2+yyTop]), ((TruffleString)yyVals[-1+yyTop]), TStringConstants.CALL, ((ParseNode)yyVals[0+yyTop]), null); return yyVal; }; states[450] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_call(((ParseNode)yyVals[-2+yyTop]), RopeConstants.CALL, ((ParseNode)yyVals[0+yyTop]), null); + yyVal = support.new_call(((ParseNode)yyVals[-2+yyTop]), TStringConstants.CALL, ((ParseNode)yyVals[0+yyTop]), null); return yyVal; }; states[451] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3047,10 +3045,10 @@ public Object yyparse (RubyLexer yyLex) { }; states[453] = (support, lexer, yyVal, yyVals, yyTop) -> { if (((ParseNode)yyVals[-3+yyTop]) instanceof SelfParseNode) { - yyVal = support.new_fcall(RopeConstants.LBRACKET_RBRACKET); + yyVal = support.new_fcall(TStringConstants.LBRACKET_RBRACKET); support.frobnicate_fcall_args(((FCallParseNode)yyVal), ((ParseNode)yyVals[-1+yyTop]), null); } else { - yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), RopeConstants.LBRACKET_RBRACKET, ((ParseNode)yyVals[-1+yyTop]), null); + yyVal = support.new_call(((ParseNode)yyVals[-3+yyTop]), TStringConstants.LBRACKET_RBRACKET, ((ParseNode)yyVals[-1+yyTop]), null); } return yyVal; }; @@ -3105,7 +3103,7 @@ public Object yyparse (RubyLexer yyLex) { states[468] = (support, lexer, yyVal, yyVals, yyTop) -> { ParseNode node; if (((ParseNode)yyVals[-3+yyTop]) != null) { - node = support.appendToBlock(support.node_assign(((ParseNode)yyVals[-3+yyTop]), new GlobalVarParseNode(((SourceIndexLength)yyVals[-5+yyTop]), support.symbolID(RopeConstants.DOLLAR_BANG))), ((ParseNode)yyVals[-1+yyTop])); + node = support.appendToBlock(support.node_assign(((ParseNode)yyVals[-3+yyTop]), new GlobalVarParseNode(((SourceIndexLength)yyVals[-5+yyTop]), support.symbolID(TStringConstants.DOLLAR_BANG))), ((ParseNode)yyVals[-1+yyTop])); if (((ParseNode)yyVals[-1+yyTop]) != null) { node.setPosition(((SourceIndexLength)yyVals[-5+yyTop])); } @@ -3142,7 +3140,7 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[478] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.asSymbol(lexer.getPosition(), ((Rope)yyVals[0+yyTop])); + yyVal = support.asSymbol(lexer.getPosition(), ((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[480] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3183,9 +3181,9 @@ public Object yyparse (RubyLexer yyLex) { lexer.setHeredocIndent(0); if (((ParseNode)yyVals[-1+yyTop]) == null) { - yyVal = new XStrParseNode(position, null, CodeRange.CR_7BIT); + yyVal = new XStrParseNode(position, null); } else if (((ParseNode)yyVals[-1+yyTop]) instanceof StrParseNode) { - yyVal = new XStrParseNode(position, (Rope) ((StrParseNode)yyVals[-1+yyTop]).getValue(), ((StrParseNode)yyVals[-1+yyTop]).getCodeRange()); + yyVal = new XStrParseNode(position, ((StrParseNode)yyVals[-1+yyTop])); } else if (((ParseNode)yyVals[-1+yyTop]) instanceof DStrParseNode) { yyVal = new DXStrParseNode(position, ((DStrParseNode)yyVals[-1+yyTop])); @@ -3256,7 +3254,7 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[501] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = lexer.createStr(RopeOperations.emptyRope(lexer.getEncoding()), 0); + yyVal = lexer.createStr(lexer.encoding.tencoding.getEmpty(), lexer.encoding, 0); return yyVal; }; states[502] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3334,32 +3332,32 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[516] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new GlobalVarParseNode(lexer.getPosition(), support.symbolID(((Rope)yyVals[0+yyTop]))); + yyVal = new GlobalVarParseNode(lexer.getPosition(), support.symbolID(((TruffleString)yyVals[0+yyTop]))); return yyVal; }; states[517] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new InstVarParseNode(lexer.getPosition(), support.symbolID(((Rope)yyVals[0+yyTop]))); + yyVal = new InstVarParseNode(lexer.getPosition(), support.symbolID(((TruffleString)yyVals[0+yyTop]))); return yyVal; }; states[518] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new ClassVarParseNode(lexer.getPosition(), support.symbolID(((Rope)yyVals[0+yyTop]))); + yyVal = new ClassVarParseNode(lexer.getPosition(), support.symbolID(((TruffleString)yyVals[0+yyTop]))); return yyVal; }; states[520] = (support, lexer, yyVal, yyVals, yyTop) -> { lexer.setState(EXPR_END|EXPR_ENDARG); - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[522] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[523] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[524] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[525] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3370,7 +3368,7 @@ public Object yyparse (RubyLexer yyLex) { /* EvStrNode :"#{some expression}"*/ /* Ruby 1.9 allows empty strings as symbols*/ if (((ParseNode)yyVals[-1+yyTop]) == null) { - yyVal = support.asSymbol(lexer.getPosition(), RopeConstants.EMPTY_US_ASCII_ROPE); + yyVal = support.asSymbol(lexer.getPosition(), TStringConstants.EMPTY_US_ASCII); } else if (((ParseNode)yyVals[-1+yyTop]) instanceof DStrParseNode) { yyVal = new DSymbolParseNode(((ParseNode)yyVals[-1+yyTop]).getPosition(), ((DStrParseNode)yyVals[-1+yyTop])); } else if (((ParseNode)yyVals[-1+yyTop]) instanceof StrParseNode) { @@ -3406,23 +3404,23 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[532] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.declareIdentifier(((Rope)yyVals[0+yyTop])); + yyVal = support.declareIdentifier(((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[533] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new InstVarParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop]))); + yyVal = new InstVarParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop]))); return yyVal; }; states[534] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new GlobalVarParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop]))); + yyVal = new GlobalVarParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop]))); return yyVal; }; states[535] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new ConstParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop]))); + yyVal = new ConstParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop]))); return yyVal; }; states[536] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new ClassVarParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop]))); + yyVal = new ClassVarParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop]))); return yyVal; }; states[537] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3442,8 +3440,8 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[541] = (support, lexer, yyVal, yyVals, yyTop) -> { - Encoding encoding = support.getConfiguration().getContext() == null ? UTF8Encoding.INSTANCE : support.getConfiguration().getContext().getEncodingManager().getLocaleEncoding().jcoding; - yyVal = new FileParseNode(lexer.tokline, StringOperations.encodeRope(lexer.getFile(), encoding, CR_UNKNOWN)); + RubyEncoding encoding = support.getConfiguration().getContext() == null ? Encodings.UTF_8 : support.getConfiguration().getContext().getEncodingManager().getLocaleEncoding(); + yyVal = new FileParseNode(lexer.tokline, TStringUtils.fromJavaString(lexer.getFile(), encoding), encoding); return yyVal; }; states[542] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3455,25 +3453,25 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[544] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.assignableLabelOrIdentifier(((Rope)yyVals[0+yyTop]), null); + yyVal = support.assignableLabelOrIdentifier(((TruffleString)yyVals[0+yyTop]), null); return yyVal; }; states[545] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new InstAsgnParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop])), NilImplicitParseNode.NIL); + yyVal = new InstAsgnParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop])), NilImplicitParseNode.NIL); return yyVal; }; states[546] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new GlobalAsgnParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop])), NilImplicitParseNode.NIL); + yyVal = new GlobalAsgnParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop])), NilImplicitParseNode.NIL); return yyVal; }; states[547] = (support, lexer, yyVal, yyVals, yyTop) -> { if (support.isInDef()) support.compile_error("dynamic constant assignment"); - yyVal = new ConstDeclParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop])), null, NilImplicitParseNode.NIL); + yyVal = new ConstDeclParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop])), null, NilImplicitParseNode.NIL); return yyVal; }; states[548] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = new ClassVarAsgnParseNode(lexer.tokline, support.symbolID(((Rope)yyVals[0+yyTop])), NilImplicitParseNode.NIL); + yyVal = new ClassVarAsgnParseNode(lexer.tokline, support.symbolID(((TruffleString)yyVals[0+yyTop])), NilImplicitParseNode.NIL); return yyVal; }; states[549] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3552,15 +3550,15 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[564] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_args_tail(((ListParseNode)yyVals[-3+yyTop]).getPosition(), ((ListParseNode)yyVals[-3+yyTop]), ((Rope)yyVals[-1+yyTop]), ((BlockArgParseNode)yyVals[0+yyTop])); + yyVal = support.new_args_tail(((ListParseNode)yyVals[-3+yyTop]).getPosition(), ((ListParseNode)yyVals[-3+yyTop]), ((TruffleString)yyVals[-1+yyTop]), ((BlockArgParseNode)yyVals[0+yyTop])); return yyVal; }; states[565] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_args_tail(((ListParseNode)yyVals[-1+yyTop]).getPosition(), ((ListParseNode)yyVals[-1+yyTop]), (Rope) null, ((BlockArgParseNode)yyVals[0+yyTop])); + yyVal = support.new_args_tail(((ListParseNode)yyVals[-1+yyTop]).getPosition(), ((ListParseNode)yyVals[-1+yyTop]), (TruffleString) null, ((BlockArgParseNode)yyVals[0+yyTop])); return yyVal; }; states[566] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_args_tail(lexer.getPosition(), null, ((Rope)yyVals[-1+yyTop]), ((BlockArgParseNode)yyVals[0+yyTop])); + yyVal = support.new_args_tail(lexer.getPosition(), null, ((TruffleString)yyVals[-1+yyTop]), ((BlockArgParseNode)yyVals[0+yyTop])); return yyVal; }; states[567] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3568,7 +3566,7 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[568] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_args_tail(((BlockArgParseNode)yyVals[0+yyTop]).getPosition(), null, (Rope) null, ((BlockArgParseNode)yyVals[0+yyTop])); + yyVal = support.new_args_tail(((BlockArgParseNode)yyVals[0+yyTop]).getPosition(), null, (TruffleString) null, ((BlockArgParseNode)yyVals[0+yyTop])); return yyVal; }; states[569] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3576,7 +3574,7 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[570] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.new_args_tail(lexer.getPosition(), null, (Rope) null, null); + yyVal = support.new_args_tail(lexer.getPosition(), null, (TruffleString) null, null); return yyVal; }; states[571] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3647,7 +3645,7 @@ public Object yyparse (RubyLexer yyLex) { SourceIndexLength position = support.getPosition(null); RestArgParseNode splat = new RestArgParseNode(position, ParserSupport.FORWARD_ARGS_REST_VAR, 0); BlockArgParseNode block = new BlockArgParseNode(position, 1, ParserSupport.FORWARD_ARGS_BLOCK_VAR); - ArgsTailHolder argsTail = support.new_args_tail(position, null, ParserSupport.FORWARD_ARGS_KWREST_VAR_ROPE, block); + ArgsTailHolder argsTail = support.new_args_tail(position, null, ParserSupport.FORWARD_ARGS_KWREST_VAR_TSTRING, block); yyVal = support.new_args(position, ((ListParseNode)yyVals[-2+yyTop]), null, splat, null, argsTail); return yyVal; }; @@ -3655,7 +3653,7 @@ public Object yyparse (RubyLexer yyLex) { SourceIndexLength position = support.getPosition(null); RestArgParseNode splat = new RestArgParseNode(position, ParserSupport.FORWARD_ARGS_REST_VAR, 0); BlockArgParseNode block = new BlockArgParseNode(position, 1, ParserSupport.FORWARD_ARGS_BLOCK_VAR); - ArgsTailHolder argsTail = support.new_args_tail(position, null, ParserSupport.FORWARD_ARGS_KWREST_VAR_ROPE, block); + ArgsTailHolder argsTail = support.new_args_tail(position, null, ParserSupport.FORWARD_ARGS_KWREST_VAR_TSTRING, block); yyVal = support.new_args(position, null, null, splat, null, argsTail); return yyVal; }; @@ -3676,16 +3674,16 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[594] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); /* Not really reached*/ + yyVal = ((TruffleString)yyVals[0+yyTop]); /* Not really reached*/ return yyVal; }; states[595] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.formal_argument(((Rope)yyVals[0+yyTop])); + yyVal = support.formal_argument(((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[596] = (support, lexer, yyVal, yyVals, yyTop) -> { - lexer.setCurrentArg(((Rope)yyVals[0+yyTop])); - yyVal = support.arg_var(((Rope)yyVals[0+yyTop])); + lexer.setCurrentArg(((TruffleString)yyVals[0+yyTop])); + yyVal = support.arg_var(((TruffleString)yyVals[0+yyTop])); return yyVal; }; states[597] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3718,27 +3716,27 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[601] = (support, lexer, yyVal, yyVals, yyTop) -> { - support.arg_var(support.formal_argument(((Rope)yyVals[0+yyTop]))); - lexer.setCurrentArg(((Rope)yyVals[0+yyTop])); - yyVal = ((Rope)yyVals[0+yyTop]); + support.arg_var(support.formal_argument(((TruffleString)yyVals[0+yyTop]))); + lexer.setCurrentArg(((TruffleString)yyVals[0+yyTop])); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[602] = (support, lexer, yyVal, yyVals, yyTop) -> { lexer.setCurrentArg(null); - yyVal = support.keyword_arg(((ParseNode)yyVals[0+yyTop]).getPosition(), support.assignableKeyword(((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]))); + yyVal = support.keyword_arg(((ParseNode)yyVals[0+yyTop]).getPosition(), support.assignableKeyword(((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]))); return yyVal; }; states[603] = (support, lexer, yyVal, yyVals, yyTop) -> { lexer.setCurrentArg(null); - yyVal = support.keyword_arg(lexer.getPosition(), support.assignableKeyword(((Rope)yyVals[0+yyTop]), RequiredKeywordArgumentValueParseNode.INSTANCE)); + yyVal = support.keyword_arg(lexer.getPosition(), support.assignableKeyword(((TruffleString)yyVals[0+yyTop]), RequiredKeywordArgumentValueParseNode.INSTANCE)); return yyVal; }; states[604] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.keyword_arg(support.getPosition(((ParseNode)yyVals[0+yyTop])), support.assignableKeyword(((Rope)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]))); + yyVal = support.keyword_arg(support.getPosition(((ParseNode)yyVals[0+yyTop])), support.assignableKeyword(((TruffleString)yyVals[-1+yyTop]), ((ParseNode)yyVals[0+yyTop]))); return yyVal; }; states[605] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = support.keyword_arg(lexer.getPosition(), support.assignableKeyword(((Rope)yyVals[0+yyTop]), RequiredKeywordArgumentValueParseNode.INSTANCE)); + yyVal = support.keyword_arg(lexer.getPosition(), support.assignableKeyword(((TruffleString)yyVals[0+yyTop]), RequiredKeywordArgumentValueParseNode.INSTANCE)); return yyVal; }; states[606] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3758,16 +3756,16 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[610] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[611] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[613] = (support, lexer, yyVal, yyVals, yyTop) -> { - support.shadowing_lvar(((Rope)yyVals[0+yyTop])); - yyVal = ((Rope)yyVals[0+yyTop]); + support.shadowing_lvar(((TruffleString)yyVals[0+yyTop])); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[614] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3801,19 +3799,19 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[621] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[622] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[623] = (support, lexer, yyVal, yyVals, yyTop) -> { - if (!support.is_local_id(((Rope)yyVals[0+yyTop]))) { + if (!support.is_local_id(((TruffleString)yyVals[0+yyTop]))) { support.yyerror("rest argument must be local variable"); } - yyVal = new RestArgParseNode(support.arg_var(support.shadowing_lvar(((Rope)yyVals[0+yyTop])))); + yyVal = new RestArgParseNode(support.arg_var(support.shadowing_lvar(((TruffleString)yyVals[0+yyTop])))); return yyVal; }; states[624] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3822,19 +3820,19 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[625] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[626] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[627] = (support, lexer, yyVal, yyVals, yyTop) -> { - if (!support.is_local_id(((Rope)yyVals[0+yyTop]))) { + if (!support.is_local_id(((TruffleString)yyVals[0+yyTop]))) { support.yyerror("block argument must be local variable"); } - yyVal = new BlockArgParseNode(support.arg_var(support.shadowing_lvar(((Rope)yyVals[0+yyTop])))); + yyVal = new BlockArgParseNode(support.arg_var(support.shadowing_lvar(((TruffleString)yyVals[0+yyTop])))); return yyVal; }; states[628] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3885,7 +3883,7 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[638] = (support, lexer, yyVal, yyVals, yyTop) -> { - ParseNode label = support.asSymbol(support.getPosition(((ParseNode)yyVals[0+yyTop])), ((Rope)yyVals[-1+yyTop])); + ParseNode label = support.asSymbol(support.getPosition(((ParseNode)yyVals[0+yyTop])), ((TruffleString)yyVals[-1+yyTop])); yyVal = support.createKeyValue(label, ((ParseNode)yyVals[0+yyTop])); return yyVal; }; @@ -3907,71 +3905,71 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; states[641] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[642] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[643] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[644] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[645] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[646] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[647] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[648] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[649] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[650] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[651] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[652] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[653] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[654] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[656] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[661] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[662] = (support, lexer, yyVal, yyVals, yyTop) -> { - yyVal = ((Rope)yyVals[0+yyTop]); + yyVal = ((TruffleString)yyVals[0+yyTop]); return yyVal; }; states[670] = (support, lexer, yyVal, yyVals, yyTop) -> { @@ -3983,7 +3981,7 @@ public Object yyparse (RubyLexer yyLex) { return yyVal; }; } -// line 2832 "RubyParser.y" +// line 2830 "RubyParser.y" /** The parse method use an lexer stream and parse it to an AST node * structure @@ -4000,4 +3998,4 @@ public RubyParserResult parse(ParserConfiguration configuration) { } // CheckStyle: stop generated // @formatter:on -// line 10885 "-" +// line 10883 "-" diff --git a/src/main/java/org/truffleruby/parser/parser/RubyParser.y b/src/main/java/org/truffleruby/parser/parser/RubyParser.y index 749dcfdb7935..9a63631c705c 100644 --- a/src/main/java/org/truffleruby/parser/parser/RubyParser.y +++ b/src/main/java/org/truffleruby/parser/parser/RubyParser.y @@ -1,15 +1,14 @@ %{ package org.truffleruby.parser.parser; -import org.jcodings.Encoding; -import org.jcodings.specific.UTF8Encoding; +import com.oracle.truffle.api.strings.TruffleString; + import org.truffleruby.Layouts; import org.truffleruby.SuppressFBWarnings; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeOperations; -import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.RubyEncoding; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.string.TStringConstants; import org.truffleruby.language.SourceIndexLength; import org.truffleruby.parser.RubyDeferredWarnings; import org.truffleruby.parser.ast.ArgsParseNode; @@ -97,7 +96,6 @@ import org.truffleruby.parser.lexer.RubyLexer; import org.truffleruby.parser.lexer.StrTerm; import org.truffleruby.parser.lexer.SyntaxException.PID; -import static org.truffleruby.core.rope.CodeRange.CR_UNKNOWN; import static org.truffleruby.parser.lexer.RubyLexer.EXPR_BEG; import static org.truffleruby.parser.lexer.RubyLexer.EXPR_END; import static org.truffleruby.parser.lexer.RubyLexer.EXPR_ENDARG; @@ -133,61 +131,61 @@ public class RubyParser { modifier_rescue keyword_alias keyword_defined keyword_BEGIN keyword_END keyword__LINE__ keyword__FILE__ keyword__ENCODING__ keyword_do_lambda -%token tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tLABEL +%token tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tLABEL %token tCHAR -%type sym symbol operation operation2 operation3 op fname cname -%type f_norm_arg restarg_mark -%type dot_or_colon blkarg_mark -%token tUPLUS /* unary+ */ -%token tUMINUS /* unary- */ -%token tUMINUS_NUM /* unary- */ -%token tPOW /* ** */ -%token tCMP /* <=> */ -%token tEQ /* == */ -%token tEQQ /* === */ -%token tNEQ /* != */ -%token tGEQ /* >= */ -%token tLEQ /* <= */ -%token tANDOP tOROP /* && and || */ -%token tMATCH tNMATCH /* =~ and !~ */ -%token tDOT /* Is just '.' in ruby and not a token */ -%token tDOT2 tDOT3 /* .. and ... */ -%token tBDOT2 tBDOT3 /* (.. and (... */ -%token tAREF tASET /* [] and []= */ -%token tLSHFT tRSHFT /* << and >> */ -%token tANDDOT /* &. */ -%token tCOLON2 /* :: */ -%token tCOLON3 /* :: at EXPR_BEG */ -%token tOP_ASGN /* +=, -= etc. */ -%token tASSOC /* => */ +%type sym symbol operation operation2 operation3 op fname cname +%type f_norm_arg restarg_mark +%type dot_or_colon blkarg_mark +%token tUPLUS /* unary+ */ +%token tUMINUS /* unary- */ +%token tUMINUS_NUM /* unary- */ +%token tPOW /* ** */ +%token tCMP /* <=> */ +%token tEQ /* == */ +%token tEQQ /* === */ +%token tNEQ /* != */ +%token tGEQ /* >= */ +%token tLEQ /* <= */ +%token tANDOP tOROP /* && and || */ +%token tMATCH tNMATCH /* =~ and !~ */ +%token tDOT /* Is just '.' in ruby and not a token */ +%token tDOT2 tDOT3 /* .. and ... */ +%token tBDOT2 tBDOT3 /* (.. and (... */ +%token tAREF tASET /* [] and []= */ +%token tLSHFT tRSHFT /* << and >> */ +%token tANDDOT /* &. */ +%token tCOLON2 /* :: */ +%token tCOLON3 /* :: at EXPR_BEG */ +%token tOP_ASGN /* +=, -= etc. */ +%token tASSOC /* => */ %token tLPAREN /* ( */ %token tLPAREN2 /* ( Is just '(' in ruby and not a token */ -%token tRPAREN /* ) */ +%token tRPAREN /* ) */ %token tLPAREN_ARG /* ( */ -%token tLBRACK /* [ */ -%token tRBRACK /* ] */ +%token tLBRACK /* [ */ +%token tRBRACK /* ] */ %token tLBRACE /* { */ %token tLBRACE_ARG /* { */ -%token tSTAR /* * */ -%token tSTAR2 /* * Is just '*' in ruby and not a token */ -%token tAMPER /* & */ -%token tAMPER2 /* & Is just '&' in ruby and not a token */ -%token tTILDE /* ` is just '`' in ruby and not a token */ -%token tPERCENT /* % is just '%' in ruby and not a token */ -%token tDIVIDE /* / is just '/' in ruby and not a token */ -%token tPLUS /* + is just '+' in ruby and not a token */ -%token tMINUS /* - is just '-' in ruby and not a token */ -%token tLT /* < is just '<' in ruby and not a token */ -%token tGT /* > is just '>' in ruby and not a token */ -%token tPIPE /* | is just '|' in ruby and not a token */ -%token tBANG /* ! is just '!' in ruby and not a token */ -%token tCARET /* ^ is just '^' in ruby and not a token */ +%token tSTAR /* * */ +%token tSTAR2 /* * Is just '*' in ruby and not a token */ +%token tAMPER /* & */ +%token tAMPER2 /* & Is just '&' in ruby and not a token */ +%token tTILDE /* ` is just '`' in ruby and not a token */ +%token tPERCENT /* % is just '%' in ruby and not a token */ +%token tDIVIDE /* / is just '/' in ruby and not a token */ +%token tPLUS /* + is just '+' in ruby and not a token */ +%token tMINUS /* - is just '-' in ruby and not a token */ +%token tLT /* < is just '<' in ruby and not a token */ +%token tGT /* > is just '>' in ruby and not a token */ +%token tPIPE /* | is just '|' in ruby and not a token */ +%token tBANG /* ! is just '!' in ruby and not a token */ +%token tCARET /* ^ is just '^' in ruby and not a token */ %token tLCURLY /* { is just '{' in ruby and not a token */ -%token tRCURLY /* } is just '}' in ruby and not a token */ -%token tBACK_REF2 /* { is just '`' in ruby and not a token */ -%token tSYMBEG tSTRING_BEG tXSTRING_BEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG -%token tSTRING_DBEG tSTRING_DVAR tSTRING_END -%token tLAMBDA tLAMBEG +%token tRCURLY /* } is just '}' in ruby and not a token */ +%token tBACK_REF2 /* { is just '`' in ruby and not a token */ +%token tSYMBEG tSTRING_BEG tXSTRING_BEG tREGEXP_BEG tWORDS_BEG tQWORDS_BEG +%token tSTRING_DBEG tSTRING_DVAR tSTRING_END +%token tLAMBDA tLAMBEG %token tNTH_REF tBACK_REF tSTRING_CONTENT tINTEGER tIMAGINARY %token tFLOAT %token tRATIONAL @@ -240,20 +238,20 @@ public class RubyParser { %type lambda %type mlhs_inner f_block_opt for_var %type opt_call_args f_marg f_margs -%type bvar -%type reswords f_bad_arg relop -%type rparen rbracket +%type bvar +%type reswords f_bad_arg relop +%type rparen rbracket %type top_compstmt top_stmts top_stmt -%token tSYMBOLS_BEG -%token tQSYMBOLS_BEG -%token tDSTAR -%token tSTRING_DEND -%type kwrest_mark f_kwrest f_label -%type args_forward -%type call_op call_op2 +%token tSYMBOLS_BEG +%token tQSYMBOLS_BEG +%token tDSTAR +%token tSTRING_DEND +%type kwrest_mark f_kwrest f_label +%type args_forward +%type call_op call_op2 %type f_arg_asgn %type fcall -%token tLABEL_END +%token tLABEL_END %type k_return k_class k_module /* @@ -453,11 +451,11 @@ command_asgn : lhs '=' command_rhs { value_expr(lexer, $3); SourceIndexLength pos = $1.getPosition(); - Rope asgnOp = $2; - if (asgnOp == RopeConstants.OR_OR) { + TruffleString asgnOp = $2; + if (asgnOp == TStringConstants.OR_OR) { $1.setValueNode($3); $$ = new OpAsgnOrParseNode(pos, support.gettable2($1), $1); - } else if (asgnOp == RopeConstants.AMPERSAND_AMPERSAND) { + } else if (asgnOp == TStringConstants.AMPERSAND_AMPERSAND) { $1.setValueNode($3); $$ = new OpAsgnAndParseNode(pos, support.gettable2($1), $1); } else { @@ -511,7 +509,7 @@ expr : command_call $$ = support.newOrNode(support.getPosition($1), $1, $3); } | keyword_not opt_nl expr { - $$ = support.getOperatorCallNode(support.getConditionNode($3), RopeConstants.BANG); + $$ = support.getOperatorCallNode(support.getConditionNode($3), TStringConstants.BANG); } | tBANG command_call { $$ = support.getOperatorCallNode(support.getConditionNode($2), $1); @@ -706,7 +704,7 @@ mlhs_node : /*mri:user_variable*/ tIDENTIFIER { SourceIndexLength position = support.getPosition($1); - $$ = new ConstDeclParseNode(position, (Rope) null, support.new_colon2(position, $1, $3), NilImplicitParseNode.NIL); + $$ = new ConstDeclParseNode(position, (TruffleString) null, support.new_colon2(position, $1, $3), NilImplicitParseNode.NIL); } | tCOLON3 tCONSTANT { if (support.isInDef()) { @@ -715,7 +713,7 @@ mlhs_node : /*mri:user_variable*/ tIDENTIFIER { SourceIndexLength position = lexer.tokline; - $$ = new ConstDeclParseNode(position, (Rope) null, support.new_colon3(position, $2), NilImplicitParseNode.NIL); + $$ = new ConstDeclParseNode(position, (TruffleString) null, support.new_colon3(position, $2), NilImplicitParseNode.NIL); } | backref { support.backrefAssignError($1); @@ -786,7 +784,7 @@ lhs : /*mri:user_variable*/ tIDENTIFIER { SourceIndexLength position = support.getPosition($1); - $$ = new ConstDeclParseNode(position, (Rope) null, support.new_colon2(position, $1, $3), NilImplicitParseNode.NIL); + $$ = new ConstDeclParseNode(position, (TruffleString) null, support.new_colon2(position, $1, $3), NilImplicitParseNode.NIL); } | tCOLON3 tCONSTANT { if (support.isInDef()) { @@ -795,7 +793,7 @@ lhs : /*mri:user_variable*/ tIDENTIFIER { SourceIndexLength position = lexer.tokline; - $$ = new ConstDeclParseNode(position, (Rope) null, support.new_colon3(position, $2), NilImplicitParseNode.NIL); + $$ = new ConstDeclParseNode(position, (TruffleString) null, support.new_colon3(position, $2), NilImplicitParseNode.NIL); } | backref { support.backrefAssignError($1); @@ -1091,11 +1089,11 @@ arg : lhs '=' arg_rhs { value_expr(lexer, $3); SourceIndexLength pos = $1.getPosition(); - Rope asgnOp = $2; - if (asgnOp == RopeConstants.OR_OR) { + TruffleString asgnOp = $2; + if (asgnOp == TStringConstants.OR_OR) { $1.setValueNode($3); $$ = new OpAsgnOrParseNode(pos, support.gettable2($1), $1); - } else if (asgnOp == RopeConstants.AMPERSAND_AMPERSAND) { + } else if (asgnOp == TStringConstants.AMPERSAND_AMPERSAND) { $1.setValueNode($3); $$ = new OpAsgnAndParseNode(pos, support.gettable2($1), $1); } else { @@ -1278,7 +1276,7 @@ rel_expr : arg relop arg %prec tGT { $$ = support.getOperatorCallNode($1, $2, $3, lexer.getPosition()); } | rel_expr relop arg %prec tGT { - support.warning(lexer.getPosition(), "comparison '" + $2.getJavaString() + "' after comparison"); + support.warning(lexer.getPosition(), "comparison '" + $2.toJavaStringUncached() + "' after comparison"); $$ = support.getOperatorCallNode($1, $2, $3, lexer.getPosition()); } @@ -1531,10 +1529,10 @@ primary : literal $$ = support.new_defined($1, $4); } | keyword_not tLPAREN2 expr rparen { - $$ = support.getOperatorCallNode(support.getConditionNode($3), RopeConstants.BANG); + $$ = support.getOperatorCallNode(support.getConditionNode($3), TStringConstants.BANG); } | keyword_not tLPAREN2 rparen { - $$ = support.getOperatorCallNode(NilImplicitParseNode.NIL, RopeConstants.BANG); + $$ = support.getOperatorCallNode(NilImplicitParseNode.NIL, TStringConstants.BANG); } | fcall brace_block { support.frobnicate_fcall_args($1, null, $2); @@ -1646,7 +1644,7 @@ primary : literal $$ = new DefnParseNode(support.extendedUntil($1, $7), support.symbolID($2), (ArgsParseNode) $5, support.getCurrentScope(), body); support.popCurrentScope(); support.setInDef($4.booleanValue()); - lexer.setCurrentArg($3); + lexer.setCurrentArg($3); } | keyword_def singleton dot_or_colon { lexer.setState(EXPR_FNAME); @@ -1665,7 +1663,7 @@ primary : literal $$ = new DefsParseNode(support.extendedUntil($1, $9), $2, support.symbolID($5), (ArgsParseNode) $7, support.getCurrentScope(), body); support.popCurrentScope(); support.setInDef($4.booleanValue()); - lexer.setCurrentArg($6); + lexer.setCurrentArg($6); } | keyword_break { $$ = new BreakParseNode($1, NilImplicitParseNode.NIL); @@ -1770,7 +1768,7 @@ block_args_tail : f_block_kwarg ',' f_kwrest opt_f_block_arg { $$ = support.new_args_tail($1.getPosition(), $1, $3, $4); } | f_block_kwarg opt_f_block_arg { - $$ = support.new_args_tail($1.getPosition(), $1, (Rope) null, $2); + $$ = support.new_args_tail($1.getPosition(), $1, (TruffleString) null, $2); } | f_kwrest opt_f_block_arg { $$ = support.new_args_tail(lexer.getPosition(), null, $1, $2); @@ -1779,14 +1777,14 @@ block_args_tail : f_block_kwarg ',' f_kwrest opt_f_block_arg { $$ = support.new_args_tail(lexer.getPosition(), null, RubyLexer.Keyword.NIL.bytes, $2); } | f_block_arg { - $$ = support.new_args_tail($1.getPosition(), null, (Rope) null, $1); + $$ = support.new_args_tail($1.getPosition(), null, (TruffleString) null, $1); } opt_block_args_tail : ',' block_args_tail { $$ = $2; } | /* none */ { - $$ = support.new_args_tail(lexer.getPosition(), null, (Rope) null, null); + $$ = support.new_args_tail(lexer.getPosition(), null, (TruffleString) null, null); } // [!null] @@ -1967,10 +1965,10 @@ method_call : fcall paren_args { $$ = support.new_call($1, $3, null, null); } | primary_value call_op paren_args { - $$ = support.new_call($1, $2, RopeConstants.CALL, $3, null); + $$ = support.new_call($1, $2, TStringConstants.CALL, $3, null); } | primary_value tCOLON2 paren_args { - $$ = support.new_call($1, RopeConstants.CALL, $3, null); + $$ = support.new_call($1, TStringConstants.CALL, $3, null); } | keyword_super paren_args { $$ = support.new_super($1, $2); @@ -1980,10 +1978,10 @@ method_call : fcall paren_args { } | primary_value '[' opt_call_args rbracket { if ($1 instanceof SelfParseNode) { - $$ = support.new_fcall(RopeConstants.LBRACKET_RBRACKET); + $$ = support.new_fcall(TStringConstants.LBRACKET_RBRACKET); support.frobnicate_fcall_args($$, $3, null); } else { - $$ = support.new_call($1, RopeConstants.LBRACKET_RBRACKET, $3, null); + $$ = support.new_call($1, TStringConstants.LBRACKET_RBRACKET, $3, null); } } @@ -2033,7 +2031,7 @@ p_cases : opt_else | p_case_body opt_rescue : keyword_rescue exc_list exc_var then compstmt opt_rescue { ParseNode node; if ($3 != null) { - node = support.appendToBlock(support.node_assign($3, new GlobalVarParseNode($1, support.symbolID(RopeConstants.DOLLAR_BANG))), $5); + node = support.appendToBlock(support.node_assign($3, new GlobalVarParseNode($1, support.symbolID(TStringConstants.DOLLAR_BANG))), $5); if ($5 != null) { node.setPosition($1); } @@ -2111,9 +2109,9 @@ xstring : tXSTRING_BEG xstring_contents tSTRING_END { lexer.setHeredocIndent(0); if ($2 == null) { - $$ = new XStrParseNode(position, null, CodeRange.CR_7BIT); + $$ = new XStrParseNode(position, null); } else if ($2 instanceof StrParseNode) { - $$ = new XStrParseNode(position, (Rope) $2.getValue(), $2.getCodeRange()); + $$ = new XStrParseNode(position, $2); } else if ($2 instanceof DStrParseNode) { $$ = new DXStrParseNode(position, $2); @@ -2180,7 +2178,7 @@ qsym_list : /* none */ { } string_contents : /* none */ { - $$ = lexer.createStr(RopeOperations.emptyRope(lexer.getEncoding()), 0); + $$ = lexer.createStr(lexer.encoding.tencoding.getEmpty(), lexer.encoding, 0); } | string_contents string_content { $$ = support.literal_concat($1, $2); @@ -2277,7 +2275,7 @@ dsym : tSYMBEG xstring_contents tSTRING_END { // EvStrNode :"#{some expression}" // Ruby 1.9 allows empty strings as symbols if ($2 == null) { - $$ = support.asSymbol(lexer.getPosition(), RopeConstants.EMPTY_US_ASCII_ROPE); + $$ = support.asSymbol(lexer.getPosition(), TStringConstants.EMPTY_US_ASCII); } else if ($2 instanceof DStrParseNode) { $$ = new DSymbolParseNode($2.getPosition(), $2); } else if ($2 instanceof StrParseNode) { @@ -2337,8 +2335,8 @@ var_ref : /*mri:user_variable*/ tIDENTIFIER { $$ = new FalseParseNode((SourceIndexLength) $$); } | keyword__FILE__ { - Encoding encoding = support.getConfiguration().getContext() == null ? UTF8Encoding.INSTANCE : support.getConfiguration().getContext().getEncodingManager().getLocaleEncoding().jcoding; - $$ = new FileParseNode(lexer.tokline, StringOperations.encodeRope(lexer.getFile(), encoding, CR_UNKNOWN)); + RubyEncoding encoding = support.getConfiguration().getContext() == null ? Encodings.UTF_8 : support.getConfiguration().getContext().getEncodingManager().getLocaleEncoding(); + $$ = new FileParseNode(lexer.tokline, TStringUtils.fromJavaString(lexer.getFile(), encoding), encoding); } | keyword__LINE__ { $$ = new FixnumParseNode(lexer.tokline, lexer.tokline.toSourceSection(lexer.getSource()).getStartLine() + lexer.getLineOffset()); @@ -2434,7 +2432,7 @@ args_tail : f_kwarg ',' f_kwrest opt_f_block_arg { $$ = support.new_args_tail($1.getPosition(), $1, $3, $4); } | f_kwarg opt_f_block_arg { - $$ = support.new_args_tail($1.getPosition(), $1, (Rope) null, $2); + $$ = support.new_args_tail($1.getPosition(), $1, (TruffleString) null, $2); } | f_kwrest opt_f_block_arg { $$ = support.new_args_tail(lexer.getPosition(), null, $1, $2); @@ -2443,14 +2441,14 @@ args_tail : f_kwarg ',' f_kwrest opt_f_block_arg { $$ = support.new_args_tail(lexer.getPosition(), null, RubyLexer.Keyword.NIL.bytes, $2); } | f_block_arg { - $$ = support.new_args_tail($1.getPosition(), null, (Rope) null, $1); + $$ = support.new_args_tail($1.getPosition(), null, (TruffleString) null, $1); } opt_args_tail : ',' args_tail { $$ = $2; } | /* none */ { - $$ = support.new_args_tail(lexer.getPosition(), null, (Rope) null, null); + $$ = support.new_args_tail(lexer.getPosition(), null, (TruffleString) null, null); } f_args : f_args_any { @@ -2507,14 +2505,14 @@ f_args_any : f_arg ',' f_optarg ',' f_rest_arg opt_args_tail { SourceIndexLength position = support.getPosition(null); RestArgParseNode splat = new RestArgParseNode(position, ParserSupport.FORWARD_ARGS_REST_VAR, 0); BlockArgParseNode block = new BlockArgParseNode(position, 1, ParserSupport.FORWARD_ARGS_BLOCK_VAR); - ArgsTailHolder argsTail = support.new_args_tail(position, null, ParserSupport.FORWARD_ARGS_KWREST_VAR_ROPE, block); + ArgsTailHolder argsTail = support.new_args_tail(position, null, ParserSupport.FORWARD_ARGS_KWREST_VAR_TSTRING, block); $$ = support.new_args(position, $1, null, splat, null, argsTail); } | args_forward { SourceIndexLength position = support.getPosition(null); RestArgParseNode splat = new RestArgParseNode(position, ParserSupport.FORWARD_ARGS_REST_VAR, 0); BlockArgParseNode block = new BlockArgParseNode(position, 1, ParserSupport.FORWARD_ARGS_BLOCK_VAR); - ArgsTailHolder argsTail = support.new_args_tail(position, null, ParserSupport.FORWARD_ARGS_KWREST_VAR_ROPE, block); + ArgsTailHolder argsTail = support.new_args_tail(position, null, ParserSupport.FORWARD_ARGS_KWREST_VAR_TSTRING, block); $$ = support.new_args(position, null, null, splat, null, argsTail); } diff --git a/src/main/java/org/truffleruby/platform/DefaultNativeConfiguration.java b/src/main/java/org/truffleruby/platform/DefaultNativeConfiguration.java index d053d70fb568..461f199cdead 100644 --- a/src/main/java/org/truffleruby/platform/DefaultNativeConfiguration.java +++ b/src/main/java/org/truffleruby/platform/DefaultNativeConfiguration.java @@ -39,11 +39,11 @@ import java.math.BigInteger; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyContext; +import org.truffleruby.core.encoding.Encodings; +import org.truffleruby.core.encoding.TStringUtils; import org.truffleruby.core.numeric.BignumOperations; import org.truffleruby.core.numeric.RubyBignum; -import org.truffleruby.core.rope.RopeOperations; import org.truffleruby.core.string.ImmutableRubyString; @@ -56,7 +56,7 @@ protected static RubyBignum newBignum(String value) { protected static ImmutableRubyString string(RubyContext context, String value) { return context .getLanguageSlow() - .getFrozenStringLiteral(RopeOperations.encodeAscii(value, UTF8Encoding.INSTANCE)); + .getFrozenStringLiteral(TStringUtils.utf8TString(value), Encodings.UTF_8); } } diff --git a/src/main/java/org/truffleruby/platform/TruffleNFIPlatform.java b/src/main/java/org/truffleruby/platform/TruffleNFIPlatform.java index a03b16dd023a..d4215d0e5eed 100644 --- a/src/main/java/org/truffleruby/platform/TruffleNFIPlatform.java +++ b/src/main/java/org/truffleruby/platform/TruffleNFIPlatform.java @@ -11,7 +11,7 @@ import org.truffleruby.RubyContext; import org.truffleruby.interop.TranslateInteropExceptionNode; -import org.truffleruby.language.library.RubyStringLibrary; +import org.truffleruby.language.RubyGuards; import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.interop.InteropLibrary; @@ -77,7 +77,7 @@ public Object resolveTypeRaw(NativeConfiguration nativeConfiguration, String typ public String resolveType(NativeConfiguration nativeConfiguration, String type) { final Object typedef = resolveTypeRaw(nativeConfiguration, type); - return toNFIType(RubyStringLibrary.getUncached().getJavaString(typedef)); + return toNFIType(RubyGuards.getJavaString(typedef)); } private String toNFIType(String type) { diff --git a/src/main/java/org/truffleruby/stdlib/CoverageNodes.java b/src/main/java/org/truffleruby/stdlib/CoverageNodes.java index 5d1593165224..4e1e1c1f3bed 100644 --- a/src/main/java/org/truffleruby/stdlib/CoverageNodes.java +++ b/src/main/java/org/truffleruby/stdlib/CoverageNodes.java @@ -12,13 +12,12 @@ import java.util.HashMap; import java.util.Map; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreModule; import org.truffleruby.core.array.RubyArray; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.language.control.RaiseException; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; @@ -53,7 +52,7 @@ protected Object disable() { @CoreMethod(names = "result_array", onSingleton = true) public abstract static class CoverageResultNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -82,10 +81,9 @@ protected RubyArray resultArray() { final String path = getLanguage().getSourcePath(source.getKey()); assert !results.containsKey(path) : "path already exists in coverage results"; results.put(path, createArray(new Object[]{ - makeStringNode.executeMake( + createString(fromJavaStringNode, path, - Encodings.UTF_8, - CodeRange.CR_UNKNOWN), + Encodings.UTF_8), createArray(countsStore) })); } diff --git a/src/main/java/org/truffleruby/stdlib/ObjSpaceNodes.java b/src/main/java/org/truffleruby/stdlib/ObjSpaceNodes.java index c9cd45c95bfa..faffe79e6ef3 100644 --- a/src/main/java/org/truffleruby/stdlib/ObjSpaceNodes.java +++ b/src/main/java/org/truffleruby/stdlib/ObjSpaceNodes.java @@ -12,6 +12,7 @@ import java.util.Set; import com.oracle.truffle.api.object.DynamicObjectLibrary; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.Layouts; import org.truffleruby.RubyContext; import org.truffleruby.builtins.CoreMethod; @@ -24,11 +25,10 @@ import org.truffleruby.core.hash.RubyHash; import org.truffleruby.core.regexp.MatchDataNodes.ValuesNode; import org.truffleruby.core.regexp.RubyMatchData; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; import org.truffleruby.core.string.ImmutableRubyString; -import org.truffleruby.core.string.StringNodes.MakeStringNode; import org.truffleruby.language.RubyDynamicObject; +import org.truffleruby.language.library.RubyStringLibrary; import org.truffleruby.language.methods.SharedMethodInfo; import org.truffleruby.language.objects.AllocationTracing.AllocationTrace; import org.truffleruby.language.objects.ObjectGraph; @@ -54,13 +54,15 @@ protected int memsizeOfHash(RubyHash object) { } @Specialization - protected int memsizeOfString(RubyString object) { - return memsizeOfObject(object) + object.rope.byteLength(); + protected int memsizeOfString(RubyString object, + @Cached RubyStringLibrary libString) { + return memsizeOfObject(object) + libString.byteLength(object); } @Specialization - protected int memsizeOfString(ImmutableRubyString object) { - return 1 + object.rope.byteLength(); + protected int memsizeOfString(ImmutableRubyString object, + @Cached RubyStringLibrary libString) { + return 1 + libString.byteLength(object); } @Specialization @@ -146,7 +148,7 @@ public abstract static class AllocationClassPathNode extends PrimitiveArrayArgum @TruffleBoundary @Specialization protected Object allocationInfo(RubyDynamicObject object, - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { AllocationTrace trace = getAllocationTrace(getContext(), object); if (trace == null) { return nil; @@ -155,7 +157,7 @@ protected Object allocationInfo(RubyDynamicObject object, if (className.isEmpty()) { return nil; } else { - return makeStringNode.executeMake(className, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, className, Encodings.UTF_8); } } } @@ -216,13 +218,13 @@ public abstract static class AllocationSourceFileNode extends PrimitiveArrayArgu @TruffleBoundary @Specialization protected Object allocationInfo(RubyDynamicObject object, - @Cached MakeStringNode makeStringNode) { + @Cached TruffleString.FromJavaStringNode fromJavaStringNode) { AllocationTrace trace = getAllocationTrace(getContext(), object); if (trace == null) { return nil; } else { final String sourcePath = getLanguage().getSourcePath(trace.allocatingSourceSection.getSource()); - return makeStringNode.executeMake(sourcePath, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, sourcePath, Encodings.UTF_8); } } diff --git a/src/main/java/org/truffleruby/stdlib/digest/DigestNodes.java b/src/main/java/org/truffleruby/stdlib/digest/DigestNodes.java index 812a4323b28d..cda359eaa78b 100644 --- a/src/main/java/org/truffleruby/stdlib/digest/DigestNodes.java +++ b/src/main/java/org/truffleruby/stdlib/digest/DigestNodes.java @@ -11,17 +11,15 @@ import com.oracle.truffle.api.CompilerDirectives; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; +import com.oracle.truffle.api.dsl.Cached; import com.oracle.truffle.api.dsl.Specialization; -import com.oracle.truffle.api.library.CachedLibrary; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; import org.truffleruby.builtins.CoreModule; import org.truffleruby.collections.ByteArrayBuilder; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.CodeRange; -import org.truffleruby.core.rope.Rope; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.language.RubyBaseNode; import org.truffleruby.language.library.RubyStringLibrary; import org.truffleruby.language.objects.AllocationTracing; @@ -104,17 +102,22 @@ protected RubyDigest sha512() { @CoreMethod(names = "update", onSingleton = true, required = 2) public abstract static class UpdateNode extends CoreMethodArrayArgumentsNode { - @TruffleBoundary - @Specialization(guards = "strings.isRubyString(message)") + @Specialization(guards = "strings.isRubyString(message)", limit = "1") protected RubyDigest update(RubyDigest digestObject, Object message, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { + @Cached RubyStringLibrary strings, + @Cached TruffleString.GetInternalByteArrayNode getInternalByteArrayNode) { final MessageDigest digest = digestObject.digest; - final Rope rope = strings.getRope(message); + var tstring = strings.getTString(message); + var byteArray = getInternalByteArrayNode.execute(tstring, strings.getTEncoding(message)); - digest.update(rope.getBytes()); + update(digest, byteArray.getArray(), byteArray.getOffset(), byteArray.getLength()); return digestObject; } + @TruffleBoundary + private void update(MessageDigest digest, byte[] input, int offset, int len) { + digest.update(input, offset, len); + } } @CoreMethod(names = "reset", onSingleton = true, required = 1) @@ -132,13 +135,13 @@ protected RubyDigest reset(RubyDigest digestObject) { @CoreMethod(names = "digest", onSingleton = true, required = 1) public abstract static class DigestNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromByteArrayNode fromByteArrayNode = TruffleString.FromByteArrayNode.create(); @Specialization protected RubyString digest(RubyDigest digestObject) { final MessageDigest digest = digestObject.digest; - return makeStringNode.executeMake(cloneAndDigest(digest), Encodings.BINARY, CodeRange.CR_VALID); + return createString(fromByteArrayNode, cloneAndDigest(digest), Encodings.BINARY); } // TODO CS 10-Apr-17 the Ruby code for digest also clones in some cases! Are we cloning redundantly? @@ -181,16 +184,18 @@ protected int digestLength(RubyDigest digestObject) { @CoreMethod(names = "bubblebabble", onSingleton = true, required = 1) public abstract static class BubbleBabbleNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromByteArrayNode fromByteArrayNode = TruffleString.FromByteArrayNode.create(); @TruffleBoundary - @Specialization(guards = "strings.isRubyString(message)") + @Specialization(guards = "strings.isRubyString(message)", limit = "1") protected RubyString bubblebabble(Object message, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - final Rope rope = strings.getRope(message); - final byte[] bubblebabbleBytes = bubblebabble(rope.getBytes(), 0, rope.byteLength()).getBytes(); + @Cached RubyStringLibrary strings) { + var rope = strings.getTString(message); + var byteArray = rope.getInternalByteArrayUncached(strings.getTEncoding(message)); + final byte[] bubblebabbleBytes = bubblebabble(byteArray.getArray(), byteArray.getOffset(), + byteArray.getLength()).getBytes(); // CR_7BIT - return makeStringNode.executeMake(bubblebabbleBytes, Encodings.UTF_8, CodeRange.CR_7BIT); + return createString(fromByteArrayNode, bubblebabbleBytes, Encodings.UTF_8); } /** Ported from OpenSSH diff --git a/src/main/java/org/truffleruby/stdlib/readline/ReadlineHistoryNodes.java b/src/main/java/org/truffleruby/stdlib/readline/ReadlineHistoryNodes.java index 6c7d64bb0716..90c65d3a9b52 100644 --- a/src/main/java/org/truffleruby/stdlib/readline/ReadlineHistoryNodes.java +++ b/src/main/java/org/truffleruby/stdlib/readline/ReadlineHistoryNodes.java @@ -40,6 +40,7 @@ */ package org.truffleruby.stdlib.readline; +import com.oracle.truffle.api.strings.TruffleString; import org.graalvm.shadowed.org.jline.reader.History; import org.truffleruby.builtins.CoreMethod; import org.truffleruby.builtins.CoreMethodArrayArgumentsNode; @@ -50,9 +51,7 @@ import org.truffleruby.core.basicobject.RubyBasicObject; import org.truffleruby.core.cast.ToIntNode; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.interop.ToJavaStringNode; import org.truffleruby.language.RubyBaseNodeWithExecute; import org.truffleruby.language.RubyNode; @@ -93,7 +92,7 @@ private void addToHistory(String item) { @CoreMethod(names = "pop", needsSelf = false) public abstract static class PopNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -105,10 +104,10 @@ protected Object pop() { } final String lastLine = consoleHolder.getHistory().removeLast().line(); - return makeStringNode.executeMake( + return createString( + fromJavaStringNode, lastLine, - getLocaleEncoding(), - CodeRange.CR_UNKNOWN); + getLocaleEncoding()); } } @@ -116,7 +115,7 @@ protected Object pop() { @CoreMethod(names = "shift", needsSelf = false) public abstract static class ShiftNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -128,10 +127,10 @@ protected Object shift() { } final String lastLine = consoleHolder.getHistory().removeFirst().line(); - return makeStringNode.executeMake( + return createString( + fromJavaStringNode, lastLine, - getLocaleEncoding(), - CodeRange.CR_UNKNOWN); + getLocaleEncoding()); } } @@ -166,18 +165,17 @@ protected Object clear() { @CoreMethod(names = "each", needsBlock = true) public abstract static class EachNode extends YieldingCoreMethodNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @Specialization protected RubyBasicObject each(RubyBasicObject history, RubyProc block) { final ConsoleHolder consoleHolder = getContext().getConsoleHolder(); for (final History.Entry e : BoundaryIterable.wrap(consoleHolder.getHistory())) { - final RubyString line = makeStringNode - .executeMake( - historyEntryToString(e), - getLocaleEncoding(), - CodeRange.CR_UNKNOWN); + final RubyString line = createString( + fromJavaStringNode, + historyEntryToString(e), + getLocaleEncoding()); callBlock(block, line); } @@ -194,7 +192,7 @@ private String historyEntryToString(History.Entry entry) { @CoreMethod(names = "[]", needsSelf = false, required = 1, lowerFixnum = 1) public abstract static class GetIndexNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -205,10 +203,10 @@ protected Object getIndex(int index) { try { final String line = consoleHolder.getHistory().get(normalizedIndex); - return makeStringNode.executeMake( + return createString( + fromJavaStringNode, line, - getLocaleEncoding(), - CodeRange.CR_UNKNOWN); + getLocaleEncoding()); } catch (IndexOutOfBoundsException e) { throw new RaiseException(getContext(), coreExceptions().indexErrorInvalidIndex(this)); } @@ -251,7 +249,7 @@ protected Object setIndex(int index, String line) { @CoreMethod(names = "delete_at", needsSelf = false, required = 1, lowerFixnum = 1) public abstract static class DeleteAtNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization @@ -260,10 +258,10 @@ protected Object deleteAt(int index) { final int normalizedIndex = index < 0 ? index + consoleHolder.getHistory().size() : index; try { final String line = consoleHolder.getHistory().remove(normalizedIndex).line(); - return makeStringNode.executeMake( + return createString( + fromJavaStringNode, line, - getLocaleEncoding(), - CodeRange.CR_UNKNOWN); + getLocaleEncoding()); } catch (IndexOutOfBoundsException e) { throw new RaiseException(getContext(), coreExceptions().indexErrorInvalidIndex(this)); } diff --git a/src/main/java/org/truffleruby/stdlib/readline/ReadlineNodes.java b/src/main/java/org/truffleruby/stdlib/readline/ReadlineNodes.java index e141a868fe52..649502d5e2d1 100644 --- a/src/main/java/org/truffleruby/stdlib/readline/ReadlineNodes.java +++ b/src/main/java/org/truffleruby/stdlib/readline/ReadlineNodes.java @@ -11,6 +11,8 @@ import java.util.List; +import com.oracle.truffle.api.dsl.Cached; +import com.oracle.truffle.api.strings.TruffleString; import org.graalvm.shadowed.org.jline.reader.Buffer; import org.graalvm.shadowed.org.jline.reader.Candidate; import org.graalvm.shadowed.org.jline.reader.Completer; @@ -18,7 +20,6 @@ import org.graalvm.shadowed.org.jline.reader.LineReader; import org.graalvm.shadowed.org.jline.reader.ParsedLine; import org.graalvm.shadowed.org.jline.reader.UserInterruptException; -import org.jcodings.specific.UTF8Encoding; import org.truffleruby.RubyContext; import org.truffleruby.RubyLanguage; import org.truffleruby.builtins.CoreMethod; @@ -37,22 +38,20 @@ import org.truffleruby.core.cast.ToStrNodeGen; import org.truffleruby.core.encoding.Encodings; import org.truffleruby.core.proc.RubyProc; -import org.truffleruby.core.rope.CodeRange; import org.truffleruby.core.string.RubyString; -import org.truffleruby.core.string.StringNodes; import org.truffleruby.core.string.StringOperations; import org.truffleruby.core.support.RubyIO; import org.truffleruby.core.thread.ThreadManager.BlockingAction; import org.truffleruby.interop.ToJavaStringNode; import org.truffleruby.interop.ToJavaStringWithDefaultNodeGen; import org.truffleruby.language.RubyBaseNodeWithExecute; +import org.truffleruby.language.RubyGuards; import org.truffleruby.language.RubyNode; import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary; import com.oracle.truffle.api.dsl.CreateCast; import com.oracle.truffle.api.dsl.NodeChild; import com.oracle.truffle.api.dsl.Specialization; -import com.oracle.truffle.api.library.CachedLibrary; import org.truffleruby.language.dispatch.DispatchNode; import org.truffleruby.language.library.RubyStringLibrary; @@ -62,13 +61,13 @@ public abstract class ReadlineNodes { @CoreMethod(names = "basic_word_break_characters", onSingleton = true) public abstract static class BasicWordBreakCharactersNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization protected RubyString basicWordBreakCharacters() { final String delimiters = getContext().getConsoleHolder().getParser().getDelimiters(); - return makeStringNode.executeMake(delimiters, Encodings.UTF_8, CodeRange.CR_UNKNOWN); + return createString(fromJavaStringNode, delimiters, Encodings.UTF_8); } } @@ -83,10 +82,10 @@ protected ToStrNode coerceCharactersToString(RubyBaseNodeWithExecute characters) } @TruffleBoundary - @Specialization(guards = "strings.isRubyString(characters)") + @Specialization(guards = "strings.isRubyString(characters)", limit = "1") protected Object setBasicWordBreakCharacters(Object characters, - @CachedLibrary(limit = "LIBSTRING_CACHE") RubyStringLibrary strings) { - final String delimiters = strings.getJavaString(characters); + @Cached RubyStringLibrary strings) { + final String delimiters = RubyGuards.getJavaString(characters); getContext().getConsoleHolder().getParser().setDelimiters(delimiters); return characters; } @@ -128,7 +127,7 @@ protected RubyArray getScreenSize() { @NodeChild(value = "addToHistory", type = RubyBaseNodeWithExecute.class) public abstract static class ReadlineNode extends CoreMethodNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @CreateCast("prompt") protected RubyNode coercePromptToJavaString(RubyNode prompt) { @@ -169,10 +168,10 @@ protected Object readline(String prompt, boolean addToHistory) { readline.getHistory().add(value); } - return makeStringNode.executeMake( + return createString( + fromJavaStringNode, value, - getContext().getEncodingManager().getDefaultExternalEncoding(), - CodeRange.CR_UNKNOWN); + getContext().getEncodingManager().getDefaultExternalEncoding()); } } @@ -223,18 +222,17 @@ protected RubyBasicObject deleteText(RubyBasicObject readline) { @CoreMethod(names = "line_buffer", onSingleton = true) public abstract static class LineBufferNode extends CoreMethodArrayArgumentsNode { - @Child private StringNodes.MakeStringNode makeStringNode = StringNodes.MakeStringNode.create(); + @Child private TruffleString.FromJavaStringNode fromJavaStringNode = TruffleString.FromJavaStringNode.create(); @TruffleBoundary @Specialization protected Object lineBuffer() { final Buffer buffer = getContext().getConsoleHolder().getReadline().getBuffer(); - return makeStringNode - .executeMake( - buffer.toString(), - getLocaleEncoding(), - CodeRange.CR_UNKNOWN); + return createString( + fromJavaStringNode, + buffer.toString(), + getLocaleEncoding()); } } @@ -294,11 +292,10 @@ public void complete(LineReader lineReader, ParsedLine commandLine, List= 7 and codepoint <= 92) escaped = nil - byte = getbyte(index) - if byte >= 7 and byte <= 92 - case byte - when 7 # \a - escaped = '\a' - when 8 # \b - escaped = '\b' - when 9 # \t - escaped = '\t' - when 10 # \n - escaped = '\n' - when 11 # \v - escaped = '\v' - when 12 # \f - escaped = '\f' - when 13 # \r - escaped = '\r' - when 27 # \e - escaped = '\e' - when 34 # \" - escaped = '\"' - when 35 # # - case getbyte(index + 1) - when 36 # $ - escaped = '\#$' - consumed += 1 - when 64 # @ - escaped = '\#@' - consumed += 1 - when 123 # { - escaped = '\#{' - consumed += 1 - end - when 92 # \\ - escaped = '\\\\' + case codepoint + when 7 # \a + escaped = '\a' + when 8 # \b + escaped = '\b' + when 9 # \t + escaped = '\t' + when 10 # \n + escaped = '\n' + when 11 # \v + escaped = '\v' + when 12 # \f + escaped = '\f' + when 13 # \r + escaped = '\r' + when 27 # \e + escaped = '\e' + when 34 # \" + escaped = '\"' + when 35 # # + case getbyte(index + 1) + when 36 # $ + escaped = '\#$' + consumed += 1 + when 64 # @ + escaped = '\#@' + consumed += 1 + when 123 # { + escaped = '\#{' + consumed += 1 end + when 92 # \\ + escaped = '\\\\' + end - if escaped - result << escaped - return consumed - end + if escaped + result << escaped + return consumed end end - if Primitive.character_printable_p(char) && unicode && char.ord < MAX_PRINTABLE_UNICODE_CHAR - result << char.encode(result_encoding) - elsif Primitive.character_printable_p(char) && (enc == result_encoding || (ascii && char.ascii_only?)) + printable = Primitive.character_printable?(codepoint, enc) + if printable && (enc == result_encoding || (ascii && char.ascii_only?)) result << char + # < 0x7F from https://github.com/ruby/ruby/blob/12f7ba5ed4a07855d6a9429aa627211db3655ca7/string.c#L6049-L6050 + # Exclude UTF-8 (unicode && ascii) because it was already checked just above + elsif printable && unicode && !ascii && codepoint < 0x7F + result << codepoint else - code = char.ord - escaped = code.to_s(16).upcase + escaped = codepoint.to_s(16).upcase if unicode - if code < 0x10000 + if codepoint < 0x10000 pad = '0' * (4 - escaped.bytesize) result << "\\u#{pad}#{escaped}" else result << "\\u{#{escaped}}" end else - if code < 0x100 + if codepoint < 0x100 pad = '0' * (2 - escaped.bytesize) result << "\\x#{pad}#{escaped}" else @@ -704,7 +678,7 @@ def chop! end end - Truffle::StringOperations.truncate(self, bytes) + Primitive.string_truncate(self, bytes) self end @@ -766,7 +740,7 @@ def chomp!(sep=undefined) bytes = bytesize - sep_bytesize end - Truffle::StringOperations.truncate(self, bytes) + Primitive.string_truncate(self, bytes) self end @@ -1119,7 +1093,8 @@ def rindex(sub, finish=undefined) def start_with?(*prefixes) if prefixes.size == 1 and prefix = prefixes[0] and String === prefix - return self[0, prefix.length] == prefix + enc = Primitive.encoding_ensure_compatible_str self, prefix + return Primitive.string_start_with?(self, prefix, enc) end # This is the workaround because `Primitive.caller_special_variables` doesn't work inside blocks yet. @@ -1132,11 +1107,9 @@ def start_with?(*prefixes) Primitive.regexp_last_match_set(storage, match_data) return true if match_data else - prefix = Truffle::Type.rb_check_convert_type original_prefix, String, :to_str - unless prefix - raise TypeError, "no implicit conversion of #{original_prefix.class} into String" - end - return true if self[0, prefix.length] == prefix + prefix = Truffle::Type.rb_convert_type original_prefix, String, :to_str + enc = Primitive.encoding_ensure_compatible_str self, prefix + return true if Primitive.string_start_with?(self, prefix, enc) end end false @@ -1260,7 +1233,7 @@ def encoding def <=>(other) if String === other - return Primitive.string_cmp self, other + return Primitive.string_cmp(self, other, Primitive.strings_compatible?(self, other)) end Truffle::ThreadOperations.detect_pair_recursion self, other do diff --git a/src/main/ruby/truffleruby/core/transcoding.rb b/src/main/ruby/truffleruby/core/transcoding.rb index 96383d1187fd..857f8c04d1a3 100644 --- a/src/main/ruby/truffleruby/core/transcoding.rb +++ b/src/main/ruby/truffleruby/core/transcoding.rb @@ -182,7 +182,7 @@ def convert(str) end def primitive_convert(source, target, offset=nil, size=nil, options=0) - source = StringValue(source) if source + source = source ? StringValue(source) : +'' target = StringValue(target) if Primitive.nil? offset @@ -221,8 +221,7 @@ def primitive_convert(source, target, offset=nil, size=nil, options=0) source.prepend putback end - Primitive.encoding_converter_primitive_convert( - self, source, target, offset, size, options) + Primitive.encoding_converter_primitive_convert(self, source, target, offset, size, options) end def finish diff --git a/src/main/ruby/truffleruby/core/truffle/regexp_operations.rb b/src/main/ruby/truffleruby/core/truffle/regexp_operations.rb index 00ca356e8c79..76d487c6e202 100644 --- a/src/main/ruby/truffleruby/core/truffle/regexp_operations.rb +++ b/src/main/ruby/truffleruby/core/truffle/regexp_operations.rb @@ -36,6 +36,7 @@ def self.match(re, str, pos=0) str = Primitive.object_kind_of?(str, Symbol) ? str.to_s : StringValue(str) pos = pos < 0 ? pos + str.size : pos + return nil if pos < 0 or pos > str.size pos = Primitive.string_byte_index_from_char_index(str, pos) search_region(re, str, pos, str.bytesize, true, true) @@ -48,6 +49,7 @@ def self.match?(re, str, pos=0) str = Primitive.object_kind_of?(str, Symbol) ? str.to_s : StringValue(str) pos = pos < 0 ? pos + str.size : pos + return false if pos < 0 or pos > str.size pos = Primitive.string_byte_index_from_char_index(str, pos) search_region(re, str, pos, str.bytesize, true, false) diff --git a/src/main/ruby/truffleruby/core/truffle/string_operations.rb b/src/main/ruby/truffleruby/core/truffle/string_operations.rb index 375c320c87ae..6e6f95aa56af 100644 --- a/src/main/ruby/truffleruby/core/truffle/string_operations.rb +++ b/src/main/ruby/truffleruby/core/truffle/string_operations.rb @@ -189,7 +189,7 @@ def self.case_mapping_option_to_int(option, downcasing=false) def self.shorten!(string, size) return if string.empty? - Truffle::StringOperations.truncate(string, string.bytesize - size) + Primitive.string_truncate(string, string.bytesize - size) end def self.to_sub_replacement(string, result, match) @@ -284,6 +284,7 @@ def self.validate_case_mapping_options(options, downcasing) end end + # MRI: rb_str_byteindex_m def self.byte_index(src, str, start=0) start += src.bytesize if start < 0 if start < 0 or start > src.bytesize diff --git a/src/main/ruby/truffleruby/post-boot/post-boot.rb b/src/main/ruby/truffleruby/post-boot/post-boot.rb index 01fd44dd94c5..ce23ac7dbeae 100644 --- a/src/main/ruby/truffleruby/post-boot/post-boot.rb +++ b/src/main/ruby/truffleruby/post-boot/post-boot.rb @@ -44,7 +44,7 @@ [$LOAD_PATH, $LOADED_FEATURES].each do |array| array.each do |path| if path.start_with?(old_home) - path.replace Truffle::Ropes.flatten_rope(path[old_home.size..-1]) + path.replace Truffle::Debug.flatten_string(path[old_home.size..-1]) paths_starting_with_home << path elsif !path.include?('/') # relative path for always provided features like 'ruby2_keywords.rb' diff --git a/src/options.yml b/src/options.yml index d599ea63f690..84d3b00546b3 100644 --- a/src/options.yml +++ b/src/options.yml @@ -170,7 +170,7 @@ INTERNAL: # Options for debugging the TruffleRuby implementation LAZY_TRANSLATION_LOG: [lazy-translation-log, boolean, false, Log lazy translations from the parser AST to the Truffle AST] LOG_DYNAMIC_CONSTANT_LOOKUP: [constant-dynamic-lookup-log, boolean, false, Log source code positions where dynamic constant lookup is performed] LOG_PENDING_INTERRUPTS: [log-pending-interrupts, boolean, false, Log when executing pending interrupts] - ROPE_PRINT_INTERN_STATS: [rope-print-intern-stats, boolean, false, Print interned rope stats at application exit] + PRINT_INTERNED_TSTRING_STATS: [print-interned-tstring-stats, boolean, false, Print interned tstring stats at application exit] CEXTS_TO_NATIVE_STATS: [cexts-to-native-stats, boolean, false, Track the number of conversions of VALUEs to native and print the stats at application exit] CEXTS_TO_NATIVE_COUNT: [cexts-to-native-count, boolean, CEXTS_TO_NATIVE_STATS, Track the number of conversions of VALUEs to native] diff --git a/src/shared/java/org/truffleruby/shared/options/OptionsCatalog.java b/src/shared/java/org/truffleruby/shared/options/OptionsCatalog.java index 90d9000bcd03..429c8e46016c 100644 --- a/src/shared/java/org/truffleruby/shared/options/OptionsCatalog.java +++ b/src/shared/java/org/truffleruby/shared/options/OptionsCatalog.java @@ -100,7 +100,7 @@ public class OptionsCatalog { public static final OptionKey LAZY_TRANSLATION_LOG_KEY = new OptionKey<>(false); public static final OptionKey LOG_DYNAMIC_CONSTANT_LOOKUP_KEY = new OptionKey<>(false); public static final OptionKey LOG_PENDING_INTERRUPTS_KEY = new OptionKey<>(false); - public static final OptionKey ROPE_PRINT_INTERN_STATS_KEY = new OptionKey<>(false); + public static final OptionKey PRINT_INTERNED_TSTRING_STATS_KEY = new OptionKey<>(false); public static final OptionKey CEXTS_TO_NATIVE_STATS_KEY = new OptionKey<>(false); public static final OptionKey CEXTS_TO_NATIVE_COUNT_KEY = new OptionKey<>(CEXTS_TO_NATIVE_STATS_KEY.getDefaultValue()); public static final OptionKey LAZY_BUILTINS_KEY = new OptionKey<>(LAZY_CALLTARGETS_KEY.getDefaultValue()); @@ -804,9 +804,9 @@ public class OptionsCatalog { .usageSyntax("") .build(); - public static final OptionDescriptor ROPE_PRINT_INTERN_STATS = OptionDescriptor - .newBuilder(ROPE_PRINT_INTERN_STATS_KEY, "ruby.rope-print-intern-stats") - .help("Print interned rope stats at application exit") + public static final OptionDescriptor PRINT_INTERNED_TSTRING_STATS = OptionDescriptor + .newBuilder(PRINT_INTERNED_TSTRING_STATS_KEY, "ruby.print-interned-tstring-stats") + .help("Print interned tstring stats at application exit") .category(OptionCategory.INTERNAL) .stability(OptionStability.EXPERIMENTAL) .usageSyntax("") @@ -1470,8 +1470,8 @@ public static OptionDescriptor fromName(String name) { return LOG_DYNAMIC_CONSTANT_LOOKUP; case "ruby.log-pending-interrupts": return LOG_PENDING_INTERRUPTS; - case "ruby.rope-print-intern-stats": - return ROPE_PRINT_INTERN_STATS; + case "ruby.print-interned-tstring-stats": + return PRINT_INTERNED_TSTRING_STATS; case "ruby.cexts-to-native-stats": return CEXTS_TO_NATIVE_STATS; case "ruby.cexts-to-native-count": @@ -1683,7 +1683,7 @@ public static OptionDescriptor[] allDescriptors() { LAZY_TRANSLATION_LOG, LOG_DYNAMIC_CONSTANT_LOOKUP, LOG_PENDING_INTERRUPTS, - ROPE_PRINT_INTERN_STATS, + PRINT_INTERNED_TSTRING_STATS, CEXTS_TO_NATIVE_STATS, CEXTS_TO_NATIVE_COUNT, LAZY_BUILTINS, diff --git a/src/test/java/org/truffleruby/ContextPermissionsTest.java b/src/test/java/org/truffleruby/ContextPermissionsTest.java index fa0ec8d9b702..287025457f87 100644 --- a/src/test/java/org/truffleruby/ContextPermissionsTest.java +++ b/src/test/java/org/truffleruby/ContextPermissionsTest.java @@ -10,7 +10,6 @@ package org.truffleruby; import org.graalvm.polyglot.Context; -import org.graalvm.polyglot.HostAccess; import org.graalvm.polyglot.Value; import org.junit.Assert; import org.junit.Test; @@ -43,9 +42,7 @@ public void testNativeNoThreads() throws Throwable { @Test public void testRequireGem() { - HostAccess arrayAccess = HostAccess.newBuilder().allowArrayAccess(true).build(); - try (Context context = Context.newBuilder("ruby").allowIO(true).allowNativeAccess(true) - .allowHostAccess(arrayAccess).build()) { + try (Context context = Context.newBuilder("ruby").allowIO(true).allowNativeAccess(true).build()) { // NOTE: rake is a bundled gem, so it needs RubyGems to be required Assert.assertEquals("Rake", context.eval("ruby", "require 'rake'; Rake.to_s").asString()); } diff --git a/src/test/java/org/truffleruby/language/loader/EmbeddedScriptTest.java b/src/test/java/org/truffleruby/language/loader/EmbeddedScriptTest.java index 265139db0c1d..8c83c9d106bb 100644 --- a/src/test/java/org/truffleruby/language/loader/EmbeddedScriptTest.java +++ b/src/test/java/org/truffleruby/language/loader/EmbeddedScriptTest.java @@ -13,7 +13,7 @@ import static junit.framework.TestCase.assertTrue; import org.junit.Test; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.string.StringOperations; public class EmbeddedScriptTest { @@ -66,7 +66,7 @@ public void testLineContainsRubyOffset() { } private byte[] bytes(String string) { - return RopeOperations.encodeAsciiBytes(string); + return StringOperations.encodeAsciiBytes(string); } } diff --git a/test/mri/excludes/TestEncodingConverter.rb b/test/mri/excludes/TestEncodingConverter.rb index 4a51667473ae..489fbef179c1 100644 --- a/test/mri/excludes/TestEncodingConverter.rb +++ b/test/mri/excludes/TestEncodingConverter.rb @@ -33,3 +33,4 @@ exclude :test_xml_escape_attr_quote, "needs investigation" exclude :test_xml_escape_text, "needs investigation" exclude :test_xml_hasharg, "needs investigation" +exclude :test_putback2, "UTF-16 string byte length is not a multiple of 2" diff --git a/test/mri/tests/ruby/enc/test_utf16.rb b/test/mri/tests/ruby/enc/test_utf16.rb index e08f2ea14e6d..50ece1fc7a8a 100644 --- a/test/mri/tests/ruby/enc/test_utf16.rb +++ b/test/mri/tests/ruby/enc/test_utf16.rb @@ -105,15 +105,15 @@ def test_utf16le_valid_encoding } } [ - "\x00", - "\xd7", + # "\x00", # TruffleString: UTF-16 string byte length is not a multiple of 2 + # "\xd7", "\x00\xd8", "\x00\xd8\x00\xd8", "\x00\xdc", "\x00\xdc\x00\xd8", "\x00\xdc\x00\xdc", - "\xe0", - "\xff", + # "\xe0", + # "\xff", ].each {|s| s.force_encoding("utf-16le") a.for(s) { diff --git a/test/mri/tests/ruby/enc/test_utf32.rb b/test/mri/tests/ruby/enc/test_utf32.rb index 76379abca097..338271aedb42 100644 --- a/test/mri/tests/ruby/enc/test_utf32.rb +++ b/test/mri/tests/ruby/enc/test_utf32.rb @@ -142,9 +142,9 @@ def test_utf32le_valid_encoding } } [ - "a", - "a\x00", - "a\x00\x00", + # "a", # TruffleString: UTF-32 string byte length is not a multiple of 4 + # "a\x00", + # "a\x00\x00", "\x00\xd8\x00\x00", "\xff\xdb\x00\x00", "\x00\xdc\x00\x00", diff --git a/test/mri/tests/ruby/test_m17n.rb b/test/mri/tests/ruby/test_m17n.rb index 3f28d55ac1a0..c0c2222e40b4 100644 --- a/test/mri/tests/ruby/test_m17n.rb +++ b/test/mri/tests/ruby/test_m17n.rb @@ -1458,9 +1458,10 @@ def test_gbk assert_equal("", "\x81\x40".force_encoding("GBK").chop) end - def test_euc_tw - assert_equal("a", "a\x8e\xa2\xa1\xa1".force_encoding("euc-tw").chop) - end + # GR-39354 + # def test_euc_tw + # assert_equal("a", "a\x8e\xa2\xa1\xa1".force_encoding("euc-tw").chop) + # end def test_valid_encoding s = "\xa1".force_encoding("euc-jp") diff --git a/test/truffle/compiler/pe/core/array_pe.rb b/test/truffle/compiler/pe/core/array_pe.rb index cbf102aa18f8..88d063538b43 100644 --- a/test/truffle/compiler/pe/core/array_pe.rb +++ b/test/truffle/compiler/pe/core/array_pe.rb @@ -10,7 +10,8 @@ example "[3, 1, 2].sort[1]", 2 -example "[14].pack('C').getbyte(0)", 14 +# [GR-39718] Relies on PEA of the byte[], but it is allocated by AMD64CalcStringAttributesMacro currently +tagged example "[14].pack('C').getbyte(0)", 14 example "sum = 0; [1,2,3].each { |x| sum += x }; sum", 6 diff --git a/test/truffle/compiler/pe/core/string_pe.rb b/test/truffle/compiler/pe/core/string_pe.rb index d5af493eba69..bed86a5efa0d 100644 --- a/test/truffle/compiler/pe/core/string_pe.rb +++ b/test/truffle/compiler/pe/core/string_pe.rb @@ -7,15 +7,10 @@ # GNU Lesser General Public License version 2.1. ABC_ROPE_1 = 'abc' -ABC_ROPE_2 = 'ab' + 'c' ABC_ROPE_USASCII = 'abc'.force_encoding('us-ascii') ABC_ROPE_UTF8 = 'abc'.force_encoding('utf-8') -simple_string = 'test' - -example "Truffle::Ropes.create_simple_string.length", simple_string.length -example "Truffle::Ropes.create_simple_string.getbyte(0)", simple_string.getbyte(0) -example "Truffle::Ropes.create_simple_string.ord", simple_string.ord +example "'abc'.encoding", Encoding::UTF_8 example "'abc'.length", 3 example "'こにちわ'.length", 4 @@ -30,10 +25,6 @@ example "x = 'abc'; 'abc' == x.dup", true example "ABC_ROPE_1 == ABC_ROPE_1", true -# Comparison against a stable but different string instance, with a stable but -# different rope node instance with the same encoding -example "ABC_ROPE_1 == ABC_ROPE_2", true - # Comparison against an unstable string instance, with a stable but different # rope node instance with the same encoding example "ABC_ROPE_1 == 'abc'", true @@ -46,9 +37,8 @@ # different rope node instance with a different but compatible encoding example "ABC_ROPE_USASCII == 'abc'", true -example "'A' == String.from_codepoint(65, Encoding::US_ASCII)", true example "'A' == 65.chr", true -tagged example "'A'.ord == 65", true +example "'A'.ord == 65", true example "'aba'[0] == 'aca'[-1]", true @@ -66,8 +56,8 @@ example "x = 'abc'; y = 'xyz'; x.replace(y) == y", true -tagged example "'abc'.getbyte(0) == 97", true -tagged example "'abc'.getbyte(-1) == 99", true +example "'abc'.getbyte(0) == 97", true +example "'abc'.getbyte(-1) == 99", true example "'abc'.getbyte(10_000) == nil", true example "14.to_s.length", 2 diff --git a/test/truffle/compiler/pe/language/defined_pe.rb b/test/truffle/compiler/pe/language/defined_pe.rb index 64b2c9e42a47..ffd609dd9e6f 100644 --- a/test/truffle/compiler/pe/language/defined_pe.rb +++ b/test/truffle/compiler/pe/language/defined_pe.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + # Copyright (c) 2015, 2019 Oracle and/or its affiliates. All rights reserved. This # code is released under a tri EPL/GPL/LGPL license. You can use it, # redistribute it and/or modify it under the terms of the: @@ -6,8 +8,8 @@ # GNU General Public License version 2, or # GNU Lesser General Public License version 2.1. -example "defined?(true) == 'true'", true -example "defined?(false) == 'false'", true -example "defined?(self) == 'self'", true -example "defined?(14) == 'expression'", true -example "defined?(14 + 2) == 'method'", true +example "defined?(true)", 'true' +example "defined?(false)", 'false' +example "defined?(self)", 'self' +example "defined?(14)", 'expression' +example "defined?(14 + 2)", 'method' diff --git a/test/truffle/integration/strict-encoding-checks.sh b/test/truffle/integration/strict-encoding-checks.sh new file mode 100755 index 000000000000..a81a98bbcc95 --- /dev/null +++ b/test/truffle/integration/strict-encoding-checks.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +source test/truffle/common.sh.inc + +ruby_version=$(jt ruby -v) + +if [[ $ruby_version =~ "Native" ]]; then + echo "The strict encoding checks test can only be run on JVM (the system property value is fixed in Native)" + exit 0 +fi + +jt test fast :all -- --vm.Dtruffle.strings.debug-strict-encoding-checks=true --vm.Dtruffle.strings.debug-non-zero-offset-arrays=true +jt test mri test/mri/tests/ruby/test_string* test/mri/tests/ruby/test_m17n* test/mri/tests/ruby/enc --vm.Dtruffle.strings.debug-strict-encoding-checks=true --vm.Dtruffle.strings.debug-non-zero-offset-arrays=true diff --git a/tool/generate-core-symbols.rb b/tool/generate-core-symbols.rb index 64df54b6ca6d..3c10b2edc6fd 100755 --- a/tool/generate-core-symbols.rb +++ b/tool/generate-core-symbols.rb @@ -29,11 +29,11 @@ import java.util.ArrayList; import java.util.List; -import org.jcodings.specific.USASCIIEncoding; +import com.oracle.truffle.api.strings.TruffleString; import org.truffleruby.core.encoding.Encodings; -import org.truffleruby.core.rope.LeafRope; -import org.truffleruby.core.rope.RopeConstants; -import org.truffleruby.core.rope.RopeOperations; +import org.truffleruby.core.encoding.TStringUtils; +import org.truffleruby.core.string.StringOperations; +import org.truffleruby.core.string.TStringConstants; // GENERATED BY #{__FILE__} // This file is automatically generated from tool/id.def with 'jt build core-symbols' @@ -167,12 +167,13 @@ footer = < gem_home, 'GEM_PATH' => "#{gem_home}:" } - sh env, RbConfig.ruby, "#{gem_home}/bin/#{name}", *args + sh env, RbConfig.ruby, "#{gem_home}/bin/#{name}", "_#{version}_", *args else env = ruby_running_jt_env if Gem::Specification.find_all_by_name(name, version).empty?