diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml new file mode 100644 index 0000000000..36e8cfb78c --- /dev/null +++ b/.github/workflows/windows.yml @@ -0,0 +1,50 @@ +# this is a work in progress! +name: windows +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize] + branches: + - '*' + +jobs: + windows: + name: "windows, sys: ${{ matrix.sys }}, ${{ matrix.ruby }}" + + env: + MAKEFLAGS: -j2 + + runs-on: windows-latest + + strategy: + fail-fast: false + matrix: + sys: [ enable, disable ] + ruby: [ "2.5", "2.6", "2.7", "3.0", "mingw" ] + + steps: + - name: configure git crlf on windows + run: | + git config --system core.autocrlf false + git config --system core.eol lf + - name: checkout + uses: actions/checkout@v2 + - name: load Ruby and bundle install + uses: MSP-Greg/setup-ruby-pkgs@v1 + with: + ruby-version: ${{ matrix.ruby }} + mingw: libxml2 libxslt + bundler-cache: true + - uses: actions/cache@v2 + if: matrix.sys == 'disable' + with: + path: ports/archives + key: ${{ matrix.os }}-${{ matrix.ruby }}-tarballs-${{ hashFiles('**/dependencies.yml') }} + restore-keys: ${{ matrix.os }}-${{ matrix.ruby }}-tarballs- + - name: bundle exec rake compile + run: | + bundle exec rake compile -- --${{ matrix.sys }}-system-libraries + - name: bundle exec rake test + run: bundle exec rake test diff --git a/CHANGELOG.md b/CHANGELOG.md index 57a28c8516..79fb158dd1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,22 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA --- +## 1.11.5 / 2021-05-19 + +### Fixed + +[Windows CRuby] Work around segfault at process exit on Windows when using libxml2 system DLLs. + +libxml 2.9.12 introduced new behavior to avoid memory leaks when unloading libxml2 shared libraries (see [libxml/!66](https://gitlab.gnome.org/GNOME/libxml2/-/merge_requests/66)). Early testing caught this segfault on non-Windows platforms (see [#2059](https://github.com/sparklemotion/nokogiri/issues/2059) and [libxml@956534e](https://gitlab.gnome.org/GNOME/libxml2/-/commit/956534e02ef280795a187c16f6ac04e107f23c5d)) but it was incompletely fixed and is still an issue on Windows platforms that are using system DLLs. + +We work around this by configuring libxml2 in this situation to use its default memory management functions. Note that if Nokogiri is not on Windows, or is not using shared system libraries, it will will continue to configure libxml2 to use Ruby's memory management functions. `Nokogiri::VERSION_INFO["libxml"]["memory_management"]` will allow you to verify when the default memory management functions are being used. [[#2241](https://github.com/sparklemotion/nokogiri/issues/2241)] + + +### Changed + +`Nokogiri::VERSION_INFO["libxml"]` now contains the key `"memory_management"` to declare whether libxml2 is using its `default` memory management functions, or whether it uses the memory management functions from `ruby`. See above for more details. + + ## 1.11.4 / 2021-05-14 ### Security diff --git a/ext/nokogiri/nokogiri.c b/ext/nokogiri/nokogiri.c index fb255f03e3..6258f1d591 100644 --- a/ext/nokogiri/nokogiri.c +++ b/ext/nokogiri/nokogiri.c @@ -191,7 +191,26 @@ Init_nokogiri() rb_const_set(mNokogiri, rb_intern("OTHER_LIBRARY_VERSIONS"), NOKOGIRI_STR_NEW2(NOKOGIRI_OTHER_LIBRARY_VERSIONS)); #endif +#if defined(_WIN32) && !defined(NOKOGIRI_PACKAGED_LIBRARIES) + /* + * We choose *not* to do use Ruby's memory management functions with windows DLLs because of this + * issue in libxml 2.9.12: + * + * https://github.com/sparklemotion/nokogiri/issues/2241 + * + * If the atexit() issue gets fixed in a future version of libxml2, then we may be able to skip + * this config only for the specific libxml2 versions 2.9.12. + * + * Alternatively, now that Ruby has a generational GC, it might be OK to let libxml2 use its + * default memory management functions (recall that this config was introduced to reduce memory + * bloat and allow Ruby to GC more often); but we should *really* test with production workloads + * before making that kind of a potentially-invasive change. + */ + rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("default")); +#else + rb_const_set(mNokogiri, rb_intern("LIBXML_MEMORY_MANAGEMENT"), NOKOGIRI_STR_NEW2("ruby")); xmlMemSetup((xmlFreeFunc)ruby_xfree, (xmlMallocFunc)ruby_xmalloc, (xmlReallocFunc)ruby_xrealloc, ruby_strdup); +#endif xmlInitParser(); diff --git a/lib/nokogiri/version/info.rb b/lib/nokogiri/version/info.rb index 1bfea42908..b1ac82275f 100644 --- a/lib/nokogiri/version/info.rb +++ b/lib/nokogiri/version/info.rb @@ -137,6 +137,7 @@ def to_hash else libxml["source"] = "system" end + libxml["memory_management"] = Nokogiri::LIBXML_MEMORY_MANAGEMENT libxml["iconv_enabled"] = libxml2_has_iconv? libxml["compiled"] = compiled_libxml_version.to_s libxml["loaded"] = loaded_libxml_version.to_s diff --git a/test/html/test_comments.rb b/test/html/test_comments.rb index 476fc4b473..7d207815f5 100644 --- a/test/html/test_comments.rb +++ b/test/html/test_comments.rb @@ -113,8 +113,7 @@ class TestComment < Nokogiri::TestCase let(:subject) { doc.at_css("div#under-test") } let(:inner_div) { doc.at_css("div#do-i-exist") } - if Nokogiri.uses_libxml? && Nokogiri::VersionInfo.instance.libxml2_using_packaged? - # see patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch + if Nokogiri::VersionInfo.instance.libxml2_using_packaged? || (Nokogiri::VersionInfo.instance.libxml2_using_system? && Nokogiri.uses_libxml?(">=2.9.11")) it "behaves as if the comment is normally closed" do # COMPLIANT assert_equal 3, subject.children.length assert subject.children[0].comment? @@ -128,9 +127,7 @@ class TestComment < Nokogiri::TestCase end end - if Nokogiri.jruby? || Nokogiri::VersionInfo.instance.libxml2_using_system? - # this behavior may change to the above in libxml v2.9.11 depending on whether - # https://gitlab.gnome.org/GNOME/libxml2/-/merge_requests/82 is merged + if Nokogiri.jruby? || (Nokogiri::VersionInfo.instance.libxml2_using_system? && Nokogiri.uses_libxml?("<2.9.11")) it "behaves as if the comment encompasses the inner div" do # NON-COMPLIANT assert_equal 1, subject.children.length assert subject.children.first.comment?