From 927b5dc1dcb260cdd0df54798780788da60c369f Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 28 Oct 2022 08:48:37 -0600 Subject: [PATCH] utf8::upgrade: Don't coerce undef arg This fixes GH #20419 --- lib/utf8.pm | 4 +++- lib/utf8.t | 3 +++ pod/perldelta.pod | 6 ++++++ universal.c | 16 +++++++++++++--- 4 files changed, 25 insertions(+), 4 deletions(-) diff --git a/lib/utf8.pm b/lib/utf8.pm index 823193b8c13a..c1f1680336db 100644 --- a/lib/utf8.pm +++ b/lib/utf8.pm @@ -5,7 +5,7 @@ use warnings; our $hint_bits = 0x00800000; -our $VERSION = '1.24'; +our $VERSION = '1.25'; our $AUTOLOAD; sub import { @@ -114,6 +114,8 @@ sequence in the native encoding (Latin-1 or EBCDIC) to UTF-8. The logical character sequence itself is unchanged. If I<$string> is already upgraded, then this is a no-op. Returns the number of octets necessary to represent the string as UTF-8. +Since Perl v5.38, if C<$string> is C no action is taken; prior to that, +it would be converted to be defined and zero-length. If your code needs to be compatible with versions of perl without C, you can force Unicode semantics on diff --git a/lib/utf8.t b/lib/utf8.t index d35110baee07..fa69fcbb8862 100644 --- a/lib/utf8.t +++ b/lib/utf8.t @@ -669,6 +669,9 @@ for(__PACKAGE__) { eval { utf8::upgrade($_) }; is $@, "", 'no error with utf8::upgrade on read-only COW'; } + +is(utf8::upgrade(undef), undef, "Returns undef for undef input"); # GH #20419 + # This one croaks, but not because the scalar is read-only eval "package \x{100};\n" . <<'END' for(__PACKAGE__) { diff --git a/pod/perldelta.pod b/pod/perldelta.pod index ce999fbdb428..f3fcd855257c 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -45,6 +45,12 @@ XXX For a release on a stable branch, this section aspires to be: [ List each incompatible change as a =head2 entry ] +=head2 L|utf8/Utility functions> + +Starting in this release, if the input string is C, it remains +C. Previously it would be changed into a defined, zero-length +string. + =head1 Deprecations XXX Any deprecated features, syntax, modules etc. should be listed here. diff --git a/universal.c b/universal.c index 119117e818a4..20a36fae8517 100644 --- a/universal.c +++ b/universal.c @@ -593,11 +593,21 @@ XS(XS_utf8_upgrade) croak_xs_usage(cv, "sv"); else { SV * const sv = ST(0); - STRLEN RETVAL; + STRLEN RETVAL = 0; dXSTARG; - RETVAL = sv_utf8_upgrade(sv); - XSprePUSH; PUSHi((IV)RETVAL); + XSprePUSH; + if (UNLIKELY(! sv)) { + XSRETURN_UNDEF; + } + + SvGETMAGIC(sv); + if (UNLIKELY(! SvOK(sv))) { + XSRETURN_UNDEF; + } + + RETVAL = sv_utf8_upgrade_nomg(sv); + PUSHi( (IV) RETVAL); } XSRETURN(1); }