From 36177a9804069f207888bd16fb06bc4a920fd417 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Sun, 18 Aug 2024 22:25:04 -0700 Subject: [PATCH 01/14] Add a lint-misc job for language-agnostic checks, initially for testing line endings --- .github/workflows/code-quality.yaml | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index cd4d7fc7f..b0b6f110c 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -70,7 +70,6 @@ jobs: } cat("All translation quality checks completed successfully!\n") shell: Rscript {0} - lint-md: runs-on: ubuntu-latest steps: @@ -78,3 +77,20 @@ jobs: run: | for (f in list.files('.ci/linters/md', full.names=TRUE)) source(f) shell: Rscript {0} + lint-misc: + runs-on: ubuntu-latest + steps: + - name: Lint line endings + run: | + for (f in list.files(recursive = TRUE)) { + # tryCatch() here are to skip binary/plaintext files (e.g. csv) + ch <- tryCatch(readChar(f, file.size(f)), condition=identity) + if (inherits(ch, "condition")) next + nch <- tryCatch(nchar(ch), condition=identity) + if (inherits(nch, "condition")) next + + if (grepl("\r", ch, fixed = TRUE)) { + stop("Line ending \\r found in ", f, ", please stick with \\n only.") + } + } + shell: Rscript {0} From f28328a3a75633f772d8834feec17923c2f5cfd5 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2024 17:53:20 +0000 Subject: [PATCH 02/14] try more tracing --- .github/workflows/code-quality.yaml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 6676d6734..0fb8ea3b2 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -87,9 +87,15 @@ jobs: for (f in list.files(recursive = TRUE)) { # tryCatch() here are to skip binary/plaintext files (e.g. csv) ch <- tryCatch(readChar(f, file.size(f)), condition=identity) - if (inherits(ch, "condition")) next + if (inherits(ch, "condition")) { + cat(sprintf("Skipping %s: readChar() produced %s\n", f, toString(class(ch))) + next + } nch <- tryCatch(nchar(ch), condition=identity) - if (inherits(nch, "condition")) next + if (inherits(nch, "condition")) { + cat(sprintf("Skipping %s: nchar() produced %s\n", f, toString(class(nch))) + next + } if (grepl("\r", ch, fixed = TRUE)) { stop("Line ending \\r found in ", f, ", please stick with \\n only.") From b5bf9e7dadef0ccd1be4a5324cbd5555532286c6 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2024 17:56:20 +0000 Subject: [PATCH 03/14] missing ( --- .github/workflows/code-quality.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 0fb8ea3b2..3dd31aa8a 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -88,12 +88,12 @@ jobs: # tryCatch() here are to skip binary/plaintext files (e.g. csv) ch <- tryCatch(readChar(f, file.size(f)), condition=identity) if (inherits(ch, "condition")) { - cat(sprintf("Skipping %s: readChar() produced %s\n", f, toString(class(ch))) + cat(sprintf("Skipping %s: readChar() produced %s\n", f, toString(class(ch)))) next } nch <- tryCatch(nchar(ch), condition=identity) if (inherits(nch, "condition")) { - cat(sprintf("Skipping %s: nchar() produced %s\n", f, toString(class(nch))) + cat(sprintf("Skipping %s: nchar() produced %s\n", f, toString(class(nch)))) next } From 2391a42736243ae3e736713588d36464e7b1e277 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2024 18:12:47 +0000 Subject: [PATCH 04/14] more tracing --- .github/workflows/code-quality.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 3dd31aa8a..375926f88 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -84,6 +84,8 @@ jobs: steps: - name: Lint line endings run: | + all_files = list.files(recursive = TRUE) + cat(sprintf("Checking line endings of %d files\n", length(all_files))) for (f in list.files(recursive = TRUE)) { # tryCatch() here are to skip binary/plaintext files (e.g. csv) ch <- tryCatch(readChar(f, file.size(f)), condition=identity) From 17aa3c09f7d3e4ec201bb4f4e7c7ba1fccaf1b2e Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2024 18:26:39 +0000 Subject: [PATCH 05/14] need checkout step --- .github/workflows/code-quality.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 375926f88..d1499c85a 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -75,6 +75,7 @@ jobs: lint-md: runs-on: ubuntu-latest steps: + - uses: actions/checkout@v4 - name: Lint run: | for (f in list.files('.ci/linters/md', full.names=TRUE)) source(f) @@ -82,6 +83,7 @@ jobs: lint-misc: runs-on: ubuntu-latest steps: + - uses: actions/checkout@v4 - name: Lint line endings run: | all_files = list.files(recursive = TRUE) From fb9f88f2feb7d8053bbe930c36670ed3fd789614 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2024 19:14:57 +0000 Subject: [PATCH 06/14] also fix NEWS linters, and fix NEWS* --- .ci/linters/md/news_linter.R | 6 ++-- NEWS.0.md | 22 ++++++------- NEWS.1.md | 62 ++++++++++++++++++------------------ NEWS.md | 10 +++--- 4 files changed, 50 insertions(+), 50 deletions(-) diff --git a/.ci/linters/md/news_linter.R b/.ci/linters/md/news_linter.R index 5eeb302a3..2ff72be68 100644 --- a/.ci/linters/md/news_linter.R +++ b/.ci/linters/md/news_linter.R @@ -2,8 +2,8 @@ any_mismatch = FALSE # ensure that numbered list in each section is in sequence check_section_numbering = function(news) { - # plain '#' catches some examples - sections = grep("^#+ [A-Z]", news) + # plain '#' catches some examples; 'd' for 'data.table' + sections = grep("^#+ [A-Zd]", news) entries = grep("^[0-9]+[.]", news) entry_value = as.integer(gsub("^([0-9]+)[.].*", "\\1", news[entries])) section_id = findInterval(entries, sections) @@ -54,7 +54,7 @@ check_gh_links = function(news) { any_error = FALSE for (news in list.files(pattern = "NEWS")) { cat(sprintf("Checking NEWS file %s...\n", news)) - news_lines = readLines("NEWS.md") + news_lines = readLines(news) any_error = any_error || check_section_numbering(news_lines) any_error = any_error || check_gh_links(news_lines) } diff --git a/NEWS.0.md b/NEWS.0.md index 9c7f53a52..4cca3ddaf 100644 --- a/NEWS.0.md +++ b/NEWS.0.md @@ -237,7 +237,7 @@ 66. `uniqueN` handles `na.rm=TRUE` argument on sorted inputs correctly, [#1771](https://github.com/Rdatatable/data.table/issues/1771). Thanks @ywhuofu. - 67. `get()` / `mget()` play nicely with `.SD` / `.SDcols`, [#1744](https://github.com/Rdatatable/data.table/issues/1753). Thanks @franknarf1. + 67. `get()` / `mget()` play nicely with `.SD` / `.SDcols`, [#1744](https://github.com/Rdatatable/data.table/issues/1744). Thanks @franknarf1. 68. Joins on integer64 columns assigns `NA` correctly for no matching rows, [#1385](https://github.com/Rdatatable/data.table/issues/1385) and partly [#1459](https://github.com/Rdatatable/data.table/issues/1459). Thanks @dlithio and @abielr. @@ -598,7 +598,7 @@ 1. Clearer explanation of what `duplicated()` does (borrowed from base). Thanks to @matthieugomez for pointing out. Closes [#872](https://github.com/Rdatatable/data.table/issues/872). - 2. `?setnames` has been updated now that `names<-` and `colnames<-` shallow (rather than deep) copy from R >= 3.1.0, [#853](https://github.com/Rdatatable/data.table/issues/872). + 2. `?setnames` has been updated now that `names<-` and `colnames<-` shallow (rather than deep) copy from R >= 3.1.0, [#853](https://github.com/Rdatatable/data.table/issues/853). 3. [FAQ 1.6](https://github.com/Rdatatable/data.table/wiki/vignettes/datatable-faq.pdf) has been embellished, [#517](https://github.com/Rdatatable/data.table/issues/517). Thanks to a discussion with Vivi and Josh O'Brien. @@ -638,7 +638,7 @@ ## NEW FEATURES -1. `by=.EACHI` runs `j` for each group in `DT` that each row of `i` joins to. + 1. `by=.EACHI` runs `j` for each group in `DT` that each row of `i` joins to. ``` setkey(DT, ID) DT[c("id1", "id2"), sum(val)] # single total across both id1 and id2 @@ -658,7 +658,7 @@ ``` where `top` is a non-join column in `Y`; i.e., join inherited column. Thanks to many, especially eddi, Sadao Milberg and Gabor Grothendieck for extended discussions. Closes [#538](https://github.com/Rdatatable/data.table/issues/538). -2. Accordingly, `X[Y, j]` now does what `X[Y][, j]` did. To return the old behaviour: `options(datatable.old.bywithoutby=TRUE)`. This is a temporary option to aid migration and will be removed in future. See [this](https://stackoverflow.com/questions/16093289/data-table-join-and-j-expression-unexpected-behavior) and [this](https://stackoverflow.com/a/16222108/403310) post for discussions and motivation. + 2. Accordingly, `X[Y, j]` now does what `X[Y][, j]` did. To return the old behaviour: `options(datatable.old.bywithoutby=TRUE)`. This is a temporary option to aid migration and will be removed in future. See [this](https://stackoverflow.com/questions/16093289/data-table-join-and-j-expression-unexpected-behavior) and [this](https://stackoverflow.com/a/16222108/403310) post for discussions and motivation. 3. `Overlap joins` ([#528](https://github.com/Rdatatable/data.table/issues/528)) is now here, finally!! Except for `type="equal"` and `maxgap` and `minoverlap` arguments, everything else is implemented. Check out `?foverlaps` and the examples there on its usage. This is a major feature addition to `data.table`. @@ -672,7 +672,7 @@ * Fixed segfault in sparse data files when bumping to character, [#796](https://github.com/Rdatatable/data.table/issues/796) and [#722](https://github.com/Rdatatable/data.table/issues/722). Thanks to Adam Kennedy and Richard Cotton for the detailed reproducible reports. * New argument `fread(...,data.table=FALSE)` returns a `data.frame` instead of a `data.table`. This can be set globally: `options(datatable.fread.datatable=FALSE)`. -6. `.()` can now be used in `j` and is identical to `list()`, for consistency with `i`. + 6. `.()` can now be used in `j` and is identical to `list()`, for consistency with `i`. ```R DT[,list(MySum=sum(B)),by=...] DT[,.(MySum=sum(B)),by=...] # same @@ -801,7 +801,7 @@ 6. `data.table()` converted POSIXlt to POSIXct, consistent with `base:::data.frame()`, but now also provides a helpful warning instead of coercing silently, [#59](https://github.com/Rdatatable/data.table/issues/59). Thanks to Brodie Gaslam, Patrick and Ragy Isaac for reporting [here](https://stackoverflow.com/questions/21487614/error-creating-r-data-table-with-date-time-posixlt) and [here](https://stackoverflow.com/questions/21320215/converting-from-data-frame-to-data-table-i-get-an-error-with-head). - 7. If another class inherits from data.table; e.g. `class(DT) == c("UserClass","data.table","data.frame")` then `DT[...]` now retains `UserClass` in the result. Thanks to Daniel Krizian for reporting, [#64](https://github.com/Rdatatable/data.table/issues/44). Test added. + 7. If another class inherits from data.table; e.g. `class(DT) == c("UserClass","data.table","data.frame")` then `DT[...]` now retains `UserClass` in the result. Thanks to Daniel Krizian for reporting, [#64](https://github.com/Rdatatable/data.table/issues/64). Test added. 8. An error `object '' not found` could occur in some circumstances, particularly after a previous error. [Reported on SO](https://stackoverflow.com/questions/22128047/how-to-avoid-weird-umlaute-error-when-using-data-table) with non-ASCII characters in a column name, a red herring we hope since non-ASCII characters are fully supported in data.table including in column names. Fix implemented and tests added. @@ -845,11 +845,11 @@ 27. `dcast.data.table` tries to preserve attributes wherever possible, except when `value.var` is a `factor` (or ordered factor). For `factor` types, the casted columns will be coerced to type `character` thereby losing the `levels` attribute. Closes [#688](https://github.com/Rdatatable/data.table/issues/688). Thanks to juancentro for reporting. - 28. `melt` now returns friendly error when `measure.vars` are not in data instead of segfault. Closes [#699](https://github.com/Rdatatable/data.table/issues/688). Thanks to vsalmendra for [this post on SO](https://stackoverflow.com/q/24326797/559784) and the subsequent bug report. + 28. `melt` now returns friendly error when `measure.vars` are not in data instead of segfault. Closes [#699](https://github.com/Rdatatable/data.table/issues/699). Thanks to vsalmendra for [this post on SO](https://stackoverflow.com/q/24326797/559784) and the subsequent bug report. 29. `DT[, list(m1 = eval(expr1), m2=eval(expr2)), by=val]` where `expr1` and `expr2` are constructed using `parse(text=.)` now works instead of resulting in error. Closes [#472](https://github.com/Rdatatable/data.table/issues/472). Thanks to Benjamin Barnes for reporting with a nice reproducible example. - 30. A join of the form `X[Y, roll=TRUE, nomatch=0L]` where some of Y's key columns occur more than once (duplicated keys) might at times return incorrect join. This was introduced only in 1.9.2 and is fixed now. Closes [#700](https://github.com/Rdatatable/data.table/issues/472). Thanks to Michael Smith for the very nice reproducible example and nice spotting of such a tricky case. + 30. A join of the form `X[Y, roll=TRUE, nomatch=0L]` where some of Y's key columns occur more than once (duplicated keys) might at times return incorrect join. This was introduced only in 1.9.2 and is fixed now. Closes [#700](https://github.com/Rdatatable/data.table/issues/700). Thanks to Michael Smith for the very nice reproducible example and nice spotting of such a tricky case. 31. Fixed an edge case in `DT[order(.)]` internal optimisation to be consistent with base. Closes [#696](https://github.com/Rdatatable/data.table/issues/696). Thanks to Michael Smith and Garrett See for reporting. @@ -859,13 +859,13 @@ 34. `dcast.data.table` now returns a friendly error when fun.aggregate value for missing combinations is 0-length, and 'fill' argument is not provided. Closes [#715](https://github.com/Rdatatable/data.table/issues/715) - 35. `rbind/rbindlist` binds in the same order of occurrence also when binding tables with duplicate names along with 'fill=TRUE' (previously, it grouped all duplicate columns together). This was the underlying reason for [#725](https://github.com/Rdatatable/data.table/issues/715). Thanks to Stefan Fritsch for the report with a nice reproducible example and discussion. + 35. `rbind/rbindlist` binds in the same order of occurrence also when binding tables with duplicate names along with 'fill=TRUE' (previously, it grouped all duplicate columns together). This was the underlying reason for [#725](https://github.com/Rdatatable/data.table/issues/725). Thanks to Stefan Fritsch for the report with a nice reproducible example and discussion. 36. `setDT` now provides a friendly error when attempted to change a variable to data.table by reference whose binding is locked (usually when the variable is within a package, ex: CO2). Closes [#475](https://github.com/Rdatatable/data.table/issues/475). Thanks to David Arenburg for filing the report [here](https://stackoverflow.com/questions/23361080/error-in-setdt-from-data-table-package) on SO. 37. `X[!Y]` where `X` and `Y` are both data.tables ignores 'allow.cartesian' argument, and rightly so because a not-join (or anti-join) cannot exceed nrow(x). Thanks to @fedyakov for spotting this. Closes [#698](https://github.com/Rdatatable/data.table/issues/698). - 38. `as.data.table.matrix` does not convert strings to factors by default. `data.table` likes and prefers using character vectors to factors. Closes [#745](https://github.com/Rdatatable/data.table/issues/698). Thanks to @fpinter for reporting the issue on the github issue tracker and to vijay for reporting [here](https://stackoverflow.com/questions/17691050/data-table-still-converts-strings-to-factors) on SO. + 38. `as.data.table.matrix` does not convert strings to factors by default. `data.table` likes and prefers using character vectors to factors. Closes [#745](https://github.com/Rdatatable/data.table/issues/745). Thanks to @fpinter for reporting the issue on the github issue tracker and to vijay for reporting [here](https://stackoverflow.com/questions/17691050/data-table-still-converts-strings-to-factors) on SO. 39. Joins of the form `x[y[z]]` resulted in duplicate names when all `x`, `y` and `z` had the same column names as non-key columns. This is now fixed. Closes [#471](https://github.com/Rdatatable/data.table/issues/471). Thanks to Christian Sigg for the nice reproducible example. @@ -910,7 +910,7 @@ `?setorder` (with alias `?order` and `?forder`). Closes [#478](https://github.com/Rdatatable/data.table/issues/478) and also [#704](https://github.com/Rdatatable/data.table/issues/704). Thanks to Christian Wolf for the report. - 8. Added tests (1351.1 and 1351.2) to catch any future regressions on particular case of binary search based subset reported [here](https://stackoverflow.com/q/24729001/559784) on SO. Thanks to Scott for the post. The regression was contained to v1.9.2 AFAICT. Closes [#734](https://github.com/Rdatatable/data.table/issues/704). + 8. Added tests (1351.1 and 1351.2) to catch any future regressions on particular case of binary search based subset reported [here](https://stackoverflow.com/q/24729001/559784) on SO. Thanks to Scott for the post. The regression was contained to v1.9.2 AFAICT. Closes [#734](https://github.com/Rdatatable/data.table/issues/734). 9. Added an `.onUnload` method to unload `data.table`'s shared object properly. Since the name of the shared object is 'datatable.so' and not 'data.table.so', 'detach' fails to unload correctly. This was the reason for the issue reported [here](https://stackoverflow.com/questions/23498804/load-detach-re-load-anomaly) on SO. Closes [#474](https://github.com/Rdatatable/data.table/issues/474). Thanks to Matthew Plourde for reporting. diff --git a/NEWS.1.md b/NEWS.1.md index a27275f0b..6a99226b8 100644 --- a/NEWS.1.md +++ b/NEWS.1.md @@ -11,7 +11,7 @@ 3. Fix multiple test concerning coercion to missing complex numbers, [#5695](https://github.com/Rdatatable/data.table/issues/5695) and [#5748](https://github.com/Rdatatable/data.table/issues/5748). Thanks to @MichaelChirico and @ben-schwen for the patches. -4. Fix multiple format warnings (e.g., -Wformat) [#5712](https://github.com/Rdatatable/data.table/pull/5712), [#5781](https://github.com/Rdatatable/data.table/pull/5781), [#5880](https://github.com/Rdatatable/data.table/pull/5800), [#5786](https://github.com/Rdatatable/data.table/pull/5786). Thanks to @MichaelChirico and @jangorecki for the patches. +4. Fix multiple format warnings (e.g., -Wformat) [#5712](https://github.com/Rdatatable/data.table/pull/5712), [#5781](https://github.com/Rdatatable/data.table/pull/5781), [#5800](https://github.com/Rdatatable/data.table/pull/5800), [#5786](https://github.com/Rdatatable/data.table/pull/5786). Thanks to @MichaelChirico and @jangorecki for the patches. # data.table [v1.14.8](https://github.com/Rdatatable/data.table/milestone/28?closed=1) (17 Feb 2023) @@ -169,7 +169,7 @@ We have requested that CRAN policy be modified to require that reverse dependency testing include packages which `Suggest` the package. Had this been the case, reverse dependency testing of `bit64` would have caught the impact on `data.table` before release. -2. `?.NGRP` now displays the help page as intended, [#4946](https://github.com/Rdatatable/data.table/issues/4649). Thanks to @KyleHaynes for posting the issue, and Cole Miller for the fix. `.NGRP` is a symbol new in v1.13.0; see below in this file. +2. `?.NGRP` now displays the help page as intended, [#4649](https://github.com/Rdatatable/data.table/issues/4649). Thanks to @KyleHaynes for posting the issue, and Cole Miller for the fix. `.NGRP` is a symbol new in v1.13.0; see below in this file. 3. `test.data.table()` failed in non-English locales such as `LC_TIME=fr_FR.UTF-8` due to `Jan` vs `janv.` in tests 168 and 2042, [#3450](https://github.com/Rdatatable/data.table/issues/3450). Thanks to @shrektan for reporting, and @tdhock for making the tests locale-aware. @@ -311,13 +311,13 @@ has a better chance of working on Mac. ## NOTES -0. Retrospective license change permission was sought from and granted by 4 contributors who were missed in [PR#2456](https://github.com/Rdatatable/data.table/pull/2456), [#4140](https://github.com/Rdatatable/data.table/pull/4140). We had used [GitHub's contributor page](https://github.com/Rdatatable/data.table/graphs/contributors) which omits 3 of these due to invalid email addresses, unlike GitLab's contributor page which includes the ids. The 4th omission was a PR to a script which should not have been excluded; a script is code too. We are sorry these contributors were not properly credited before. They have now been added to the contributors list as displayed on CRAN. All the contributors of code to data.table hold its copyright jointly; your contributions belong to you. You contributed to data.table when it had a particular license at that time, and you contributed on that basis. This is why in the last license change, all contributors of code were consulted and each had a veto. +1. Retrospective license change permission was sought from and granted by 4 contributors who were missed in [PR#2456](https://github.com/Rdatatable/data.table/pull/2456), [#4140](https://github.com/Rdatatable/data.table/pull/4140). We had used [GitHub's contributor page](https://github.com/Rdatatable/data.table/graphs/contributors) which omits 3 of these due to invalid email addresses, unlike GitLab's contributor page which includes the ids. The 4th omission was a PR to a script which should not have been excluded; a script is code too. We are sorry these contributors were not properly credited before. They have now been added to the contributors list as displayed on CRAN. All the contributors of code to data.table hold its copyright jointly; your contributions belong to you. You contributed to data.table when it had a particular license at that time, and you contributed on that basis. This is why in the last license change, all contributors of code were consulted and each had a veto. -1. `as.IDate`, `as.ITime`, `second`, `minute`, and `hour` now recognize UTC equivalents for speed: GMT, GMT-0, GMT+0, GMT0, Etc/GMT, and Etc/UTC, [#4116](https://github.com/Rdatatable/data.table/issues/4116). +2. `as.IDate`, `as.ITime`, `second`, `minute`, and `hour` now recognize UTC equivalents for speed: GMT, GMT-0, GMT+0, GMT0, Etc/GMT, and Etc/UTC, [#4116](https://github.com/Rdatatable/data.table/issues/4116). -2. `set2key`, `set2keyv`, and `key2` have been removed, as they have been warning since v1.9.8 (Nov 2016) and halting with helpful message since v1.11.0 (May 2018). When they were introduced in version 1.9.4 (Oct 2014) they were marked as 'experimental' and quickly superseded by `setindex` and `indices`. +3. `set2key`, `set2keyv`, and `key2` have been removed, as they have been warning since v1.9.8 (Nov 2016) and halting with helpful message since v1.11.0 (May 2018). When they were introduced in version 1.9.4 (Oct 2014) they were marked as 'experimental' and quickly superseded by `setindex` and `indices`. -3. `data.table` now supports messaging in simplified Chinese (locale `zh_CN`). This was the result of a monumental collaboration to translate `data.table`'s roughly 1400 warnings, errors, and verbose messages (about 16,000 words/100,000 characters) over the course of two months from volunteer translators in at least 4 time zones, most of whom are first-time `data.table` contributors and many of whom are first-time OSS contributors! +4. `data.table` now supports messaging in simplified Chinese (locale `zh_CN`). This was the result of a monumental collaboration to translate `data.table`'s roughly 1400 warnings, errors, and verbose messages (about 16,000 words/100,000 characters) over the course of two months from volunteer translators in at least 4 time zones, most of whom are first-time `data.table` contributors and many of whom are first-time OSS contributors! A big thanks goes out to @fengqifang, @hongyuanjia, @biobai, @zhiiiyang, @Leo-Lee15, @soappp9527, @amy17519, @Zachary-Wu, @caiquanyou, @dracodoc, @JulianYlli12, @renkun-ken, @Xueliang24, @koohoko, @KingdaShi, @gaospecial, @shrektan, @sunshine1126, @shawnchen1996, @yc0802, @HesperusArcher, and @Emberwhirl, all of whom took time from their busy schedules to translate and review others' translations. Especial thanks goes to @zhiiiyang and @hongyuanjia who went above and beyond in helping to push the project over the finish line, and to @GuangchuangYu who helped to organize the volunteer pool. @@ -327,11 +327,11 @@ has a better chance of working on Mac. We will evaluate the feasibility (in terms of maintenance difficulty and CRAN package size limits) of offering support for other languages in later releases. -4. `fifelse` and `fcase` now notify users that S4 objects (except `nanotime`) are not supported [#4135](https://github.com/Rdatatable/data.table/issues/4135). Thanks to @torema-ed for bringing it to our attention and Morgan Jacob for the PR. +5. `fifelse` and `fcase` now notify users that S4 objects (except `nanotime`) are not supported [#4135](https://github.com/Rdatatable/data.table/issues/4135). Thanks to @torema-ed for bringing it to our attention and Morgan Jacob for the PR. -5. `frank(..., ties.method="random", na.last=NA)` now returns the same random ordering that `base::rank` does, [#4243](https://github.com/Rdatatable/data.table/pull/4243). +6. `frank(..., ties.method="random", na.last=NA)` now returns the same random ordering that `base::rank` does, [#4243](https://github.com/Rdatatable/data.table/pull/4243). -6. The error message when mistakenly using `:=` in `i` instead of `j` has been much improved, [#4227](https://github.com/Rdatatable/data.table/issues/4227). Thanks to Hugh Parsonage for the detailed suggestion. +7. The error message when mistakenly using `:=` in `i` instead of `j` has been much improved, [#4227](https://github.com/Rdatatable/data.table/issues/4227). Thanks to Hugh Parsonage for the detailed suggestion. ```R > DT = data.table(A=1:2) @@ -348,17 +348,17 @@ has a better chance of working on Mac. 2: 2 3 ``` -7. Added more explanation/examples to `?data.table` for how to use `.BY`, [#1363](https://github.com/Rdatatable/data.table/issues/1363). +8. Added more explanation/examples to `?data.table` for how to use `.BY`, [#1363](https://github.com/Rdatatable/data.table/issues/1363). -8. Changes upstream in R have been accommodated; e.g. `c.POSIXct` now raises `'origin' must be supplied` which impacted `foverlaps`, [#4428](https://github.com/Rdatatable/data.table/pull/4428). +9. Changes upstream in R have been accommodated; e.g. `c.POSIXct` now raises `'origin' must be supplied` which impacted `foverlaps`, [#4428](https://github.com/Rdatatable/data.table/pull/4428). -9. `data.table::update.dev.pkg()` now unloads the `data.table` namespace to alleviate a DLL lock issue on Windows, [#4403](https://github.com/Rdatatable/data.table/issues/4403). Thanks to @drag5 for reporting. +10. `data.table::update.dev.pkg()` now unloads the `data.table` namespace to alleviate a DLL lock issue on Windows, [#4403](https://github.com/Rdatatable/data.table/issues/4403). Thanks to @drag5 for reporting. -10. `data.table` packages binaries built by R version 3 (R3) should only be installed in R3, and similarly `data.table` package binaries built by R4 should only be installed in R4. Otherwise, `package ‘data.table’ was built under R version...` warning will occur which should not be ignored. This is due to a very welcome change to `rbind` and `cbind` in R 4.0.0 which enabled us to remove workarounds, see news item in v1.12.6 below in this file. To continue to support both R3 and R4, `data.table`'s NAMESPACE file contains a condition on the R major version (3 or 4) and this is what gives rise to the requirement that the major version used to build `data.table` must match the major version used to install it. Thanks to @vinhdizzo for reporting, [#4528](https://github.com/Rdatatable/data.table/issues/4528). +11. `data.table` packages binaries built by R version 3 (R3) should only be installed in R3, and similarly `data.table` package binaries built by R4 should only be installed in R4. Otherwise, `package ‘data.table’ was built under R version...` warning will occur which should not be ignored. This is due to a very welcome change to `rbind` and `cbind` in R 4.0.0 which enabled us to remove workarounds, see news item in v1.12.6 below in this file. To continue to support both R3 and R4, `data.table`'s NAMESPACE file contains a condition on the R major version (3 or 4) and this is what gives rise to the requirement that the major version used to build `data.table` must match the major version used to install it. Thanks to @vinhdizzo for reporting, [#4528](https://github.com/Rdatatable/data.table/issues/4528). -11. Internal function `shallow()` no longer makes a deep copy of secondary indices. This eliminates a relatively small time and memory overhead when indices are present that added up significantly when performing many operations, such as joins, in a loop or when joining in `j` by group, [#4311](https://github.com/Rdatatable/data.table/issues/4311). Many thanks to @renkun-ken for the report, and @tlapak for the investigation and PR. +12. Internal function `shallow()` no longer makes a deep copy of secondary indices. This eliminates a relatively small time and memory overhead when indices are present that added up significantly when performing many operations, such as joins, in a loop or when joining in `j` by group, [#4311](https://github.com/Rdatatable/data.table/issues/4311). Many thanks to @renkun-ken for the report, and @tlapak for the investigation and PR. -12. The `datatable.old.unique.by.key` option has been removed as per the 4 year schedule detailed in note 10 of v1.12.4 (Oct 2019), note 10 of v1.11.0 (May 2018), and note 1 of v1.9.8 (Nov 2016). It has been generating a helpful warning for 2 years, and helpful error for 1 year. +13. The `datatable.old.unique.by.key` option has been removed as per the 4 year schedule detailed in note 10 of v1.12.4 (Oct 2019), note 10 of v1.11.0 (May 2018), and note 1 of v1.9.8 (Nov 2016). It has been generating a helpful warning for 2 years, and helpful error for 1 year. # data.table [v1.12.8](https://github.com/Rdatatable/data.table/milestone/15?closed=1) (09 Dec 2019) @@ -715,7 +715,7 @@ has a better chance of working on Mac. 33. Using `get`/`mget` in `j` could cause `.SDcols` to be ignored or reordered, [#1744](https://github.com/Rdatatable/data.table/issues/1744), [#1965](https://github.com/Rdatatable/data.table/issues/1965), and [#2036](https://github.com/Rdatatable/data.table/issues/2036). Thanks @franknarf1, @MichaelChirico, and @TonyBonen, for the reports. -34. `DT[, i-1L, with=FALSE]` would misinterpret the minus sign and return an incorrect result, [#2019](https://github.com/Rdatatable/data.table/issues/2109). Thanks @cguill95 for the report. +34. `DT[, i-1L, with=FALSE]` would misinterpret the minus sign and return an incorrect result, [#2109](https://github.com/Rdatatable/data.table/issues/2109). Thanks @cguill95 for the report. 35. `DT[id==1, DT2[.SD, on="id"]]` (i.e. joining from `.SD` in `j`) could incorrectly fail in some cases due to `.SD` being locked, [#1926](https://github.com/Rdatatable/data.table/issues/1926), and when updating-on-join with factors [#3559](https://github.com/Rdatatable/data.table/issues/3559) [#2099](https://github.com/Rdatatable/data.table/issues/2099). Thanks @franknarf1 and @Henrik-P for the reports and for diligently tracking use cases for almost 3 years! @@ -785,7 +785,7 @@ has a better chance of working on Mac. 18. Added a note to `?setkey` clarifying that `setkey` always uses C-locale sorting (as has been noted in `?setorder`). Thanks @JBreidaks for the report in [#2114](https://github.com/Rdatatable/data.table/issues/2114). -19. `hour()`/`minute()`/`second()` are much faster for `ITime` input, [#3518](https://github.com/Rdatatable/data.table/issues/3158). +19. `hour()`/`minute()`/`second()` are much faster for `ITime` input, [#3158](https://github.com/Rdatatable/data.table/issues/3158). 20. New alias `setalloccol` for `alloc.col`, [#3475](https://github.com/Rdatatable/data.table/issues/3475). For consistency with `set*` prefixes for functions that operate in-place (like `setkey`, `setorder`, etc.). `alloc.col` is not going to be deprecated but we recommend using `setalloccol`. @@ -1397,38 +1397,38 @@ Thanks to @sritchie73 for reporting and fixing [PR#2631](https://github.com/Rdat ## NOTES -0. The license has been changed from GPL to MPL (Mozilla Public License). All contributors were consulted and approved. [PR#2456](https://github.com/Rdatatable/data.table/pull/2456) details the reasons for the change. +1. The license has been changed from GPL to MPL (Mozilla Public License). All contributors were consulted and approved. [PR#2456](https://github.com/Rdatatable/data.table/pull/2456) details the reasons for the change. -1. `?data.table` makes explicit the option of using a `logical` vector in `j` to select columns, [#1978](https://github.com/Rdatatable/data.table/issues/1978). Thanks @Henrik-P for the note and @MichaelChirico for filing. +2. `?data.table` makes explicit the option of using a `logical` vector in `j` to select columns, [#1978](https://github.com/Rdatatable/data.table/issues/1978). Thanks @Henrik-P for the note and @MichaelChirico for filing. -2. Test 1675.1 updated to cope with a change in R-devel in June 2017 related to `factor()` and `NA` levels. +3. Test 1675.1 updated to cope with a change in R-devel in June 2017 related to `factor()` and `NA` levels. -3. Package `ezknitr` has been added to the whitelist of packages that run user code and should be consider data.table-aware, [#2266](https://github.com/Rdatatable/data.table/issues/2266). Thanks to Matt Mills for testing and reporting. +4. Package `ezknitr` has been added to the whitelist of packages that run user code and should be consider data.table-aware, [#2266](https://github.com/Rdatatable/data.table/issues/2266). Thanks to Matt Mills for testing and reporting. -4. Printing with `quote = TRUE` now quotes column names as well, [#1319](https://github.com/Rdatatable/data.table/issues/1319). Thanks @jan-glx for the suggestion and @MichaelChirico for the PR. +5. Printing with `quote = TRUE` now quotes column names as well, [#1319](https://github.com/Rdatatable/data.table/issues/1319). Thanks @jan-glx for the suggestion and @MichaelChirico for the PR. -5. Added a blurb to `?melt.data.table` explicating the subtle difference in behavior of the `id.vars` argument vis-a-vis its analog in `reshape2::melt`, [#1699](https://github.com/Rdatatable/data.table/issues/1699). Thanks @MichaelChirico for uncovering and filing. +6. Added a blurb to `?melt.data.table` explicating the subtle difference in behavior of the `id.vars` argument vis-a-vis its analog in `reshape2::melt`, [#1699](https://github.com/Rdatatable/data.table/issues/1699). Thanks @MichaelChirico for uncovering and filing. -6. Added some clarification about the usage of `on` to `?data.table`, [#2383](https://github.com/Rdatatable/data.table/issues/2383). Thanks to @peterlittlejohn for volunteering his confusion and @MichaelChirico for brushing things up. +7. Added some clarification about the usage of `on` to `?data.table`, [#2383](https://github.com/Rdatatable/data.table/issues/2383). Thanks to @peterlittlejohn for volunteering his confusion and @MichaelChirico for brushing things up. -7. Clarified that "data.table always sorts in `C-locale`" means that upper-case letters are sorted before lower-case letters by ordering in data.table (e.g. `setorder`, `setkey`, `DT[order(...)]`). Thanks to @hughparsonage for the pull request editing the documentation. Note this makes no difference in most cases of data; e.g. ids where only uppercase or lowercase letters are used (`"AB123"<"AC234"` is always true, regardless), or country names and words which are consistently capitalized. For example, `"America" < "Brazil"` is not affected (it's always true), and neither is `"america" < "brazil"` (always true too); since the first letter is consistently capitalized. But, whether `"america" < "Brazil"` (the words are not consistently capitalized) is true or false in base R depends on the locale of your R session. In America it is true by default and false if you i) type `Sys.setlocale(locale="C")`, ii) the R session has been started in a C locale for you which can happen on servers/services (the locale comes from the environment the R session is started in). However, `"america" < "Brazil"` is always, consistently false in data.table which can be a surprise because it differs to base R by default in most regions. It is false because `"B"<"a"` is true because all upper-case letters come first, followed by all lower case letters (the ascii number of each letter determines the order, which is what is meant by `C-locale`). +8. Clarified that "data.table always sorts in `C-locale`" means that upper-case letters are sorted before lower-case letters by ordering in data.table (e.g. `setorder`, `setkey`, `DT[order(...)]`). Thanks to @hughparsonage for the pull request editing the documentation. Note this makes no difference in most cases of data; e.g. ids where only uppercase or lowercase letters are used (`"AB123"<"AC234"` is always true, regardless), or country names and words which are consistently capitalized. For example, `"America" < "Brazil"` is not affected (it's always true), and neither is `"america" < "brazil"` (always true too); since the first letter is consistently capitalized. But, whether `"america" < "Brazil"` (the words are not consistently capitalized) is true or false in base R depends on the locale of your R session. In America it is true by default and false if you i) type `Sys.setlocale(locale="C")`, ii) the R session has been started in a C locale for you which can happen on servers/services (the locale comes from the environment the R session is started in). However, `"america" < "Brazil"` is always, consistently false in data.table which can be a surprise because it differs to base R by default in most regions. It is false because `"B"<"a"` is true because all upper-case letters come first, followed by all lower case letters (the ascii number of each letter determines the order, which is what is meant by `C-locale`). -8. `data.table`'s dependency has been moved forward from R 3.0.0 (Apr 2013) to R 3.1.0 (Apr 2014; i.e. 3.5 years old). We keep this dependency as old as possible for as long as possible as requested by users in managed environments. Thanks to Jan Gorecki, the test suite from latest dev now runs on R 3.1.0 continuously, as well as R-release (currently 3.4.2) and latest R-devel snapshot. The primary motivation for the bump to R 3.1.0 was allowing one new test which relies on better non-copying behaviour in that version, [#2484](https://github.com/Rdatatable/data.table/issues/2484). It also allows further internal simplifications. Thanks to @MichaelChirico for fixing another test that failed on R 3.1.0 due to slightly different behaviour of `base::read.csv` in R 3.1.0-only which the test was comparing to, [#2489](https://github.com/Rdatatable/data.table/pull/2489). +9. `data.table`'s dependency has been moved forward from R 3.0.0 (Apr 2013) to R 3.1.0 (Apr 2014; i.e. 3.5 years old). We keep this dependency as old as possible for as long as possible as requested by users in managed environments. Thanks to Jan Gorecki, the test suite from latest dev now runs on R 3.1.0 continuously, as well as R-release (currently 3.4.2) and latest R-devel snapshot. The primary motivation for the bump to R 3.1.0 was allowing one new test which relies on better non-copying behaviour in that version, [#2484](https://github.com/Rdatatable/data.table/issues/2484). It also allows further internal simplifications. Thanks to @MichaelChirico for fixing another test that failed on R 3.1.0 due to slightly different behaviour of `base::read.csv` in R 3.1.0-only which the test was comparing to, [#2489](https://github.com/Rdatatable/data.table/pull/2489). -9. New vignette added: _Importing data.table_ - focused on using data.table as a dependency in R packages. Answers most commonly asked questions and promote good practices. +10. New vignette added: _Importing data.table_ - focused on using data.table as a dependency in R packages. Answers most commonly asked questions and promote good practices. -10. As warned in v1.9.8 release notes below in this file (25 Nov 2016) it has been 1 year since then and so use of `options(datatable.old.unique.by.key=TRUE)` to restore the old default is now deprecated with warning. The new warning states that this option still works and repeats the request to pass `by=key(DT)` explicitly to `unique()`, `duplicated()`, `uniqueN()` and `anyDuplicated()` and to stop using this option. In another year, this warning will become error. Another year after that the option will be removed. +11. As warned in v1.9.8 release notes below in this file (25 Nov 2016) it has been 1 year since then and so use of `options(datatable.old.unique.by.key=TRUE)` to restore the old default is now deprecated with warning. The new warning states that this option still works and repeats the request to pass `by=key(DT)` explicitly to `unique()`, `duplicated()`, `uniqueN()` and `anyDuplicated()` and to stop using this option. In another year, this warning will become error. Another year after that the option will be removed. -11. As `set2key()` and `key2()` have been warning since v1.9.8 (Nov 2016), their warnings have now been upgraded to errors. Note that when they were introduced in version 1.9.4 (Oct 2014) they were marked as 'experimental' in NEWS item 4. They will be removed in one year. +12. As `set2key()` and `key2()` have been warning since v1.9.8 (Nov 2016), their warnings have now been upgraded to errors. Note that when they were introduced in version 1.9.4 (Oct 2014) they were marked as 'experimental' in NEWS item 4. They will be removed in one year. ``` Was warning: set2key() will be deprecated in the next release. Please use setindex() instead. Now error: set2key() is now deprecated. Please use setindex() instead. ``` -12. The option `datatable.showProgress` is no longer set to a default value when the package is loaded. Instead, the `default=` argument of `getOption` is used by both `fwrite` and `fread`. The default is the result of `interactive()` at the time of the call. Using `getOption` in this way is intended to be more helpful to users looking at `args(fread)` and `?fread`. +13. The option `datatable.showProgress` is no longer set to a default value when the package is loaded. Instead, the `default=` argument of `getOption` is used by both `fwrite` and `fread`. The default is the result of `interactive()` at the time of the call. Using `getOption` in this way is intended to be more helpful to users looking at `args(fread)` and `?fread`. -13. `print.data.table()` invisibly returns its first argument instead of `NULL`. This behavior is compatible with the standard `print.data.frame()` and tibble's `print.tbl_df()`. Thanks to @heavywatal for [PR#2807](https://github.com/Rdatatable/data.table/pull/2807) +14. `print.data.table()` invisibly returns its first argument instead of `NULL`. This behavior is compatible with the standard `print.data.frame()` and tibble's `print.tbl_df()`. Thanks to @heavywatal for [PR#2807](https://github.com/Rdatatable/data.table/pull/2807) # data.table v1.10.4-3 (20 Oct 2017) diff --git a/NEWS.md b/NEWS.md index 04e122935..6e70a0f4e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -101,15 +101,15 @@ The former might be a matter of preference requiring you to specify a different `tolerance=`, while the latter was clearly a bug. -12. `rbindlist()` and `shift()` could lead to a protection stack overflow when applied to a list containing many nested lists exceeding the pointer protection stack size, [#4536](https://github.com/Rdatatable/data.table/issues/4536). Thanks to @ProfFancyPants for reporting, and @ben-schwen (`rbindlist`) and @MichaelChirico (`shift`) for the fix. +10. `rbindlist()` and `shift()` could lead to a protection stack overflow when applied to a list containing many nested lists exceeding the pointer protection stack size, [#4536](https://github.com/Rdatatable/data.table/issues/4536). Thanks to @ProfFancyPants for reporting, and @ben-schwen (`rbindlist`) and @MichaelChirico (`shift`) for the fix. -13. `fread(x, colClasses="POSIXct")` now also works for columns containing only `NA` values, [#6208](https://github.com/Rdatatable/data.table/issues/6208). Thanks to @markus-schaffer for the report, and @ben-schwen for the fix. +11. `fread(x, colClasses="POSIXct")` now also works for columns containing only `NA` values, [#6208](https://github.com/Rdatatable/data.table/issues/6208). Thanks to @markus-schaffer for the report, and @ben-schwen for the fix. -14. `fread()` is more careful about detecting that a file is compressed in bzip2 format, [#6304](https://github.com/Rdatatable/data.table/issues/6304). In particular, we also check the 4th byte of the file is a digit; in rare cases, a legitimate uncompressed CSV file could match 'BZh' as the first 3 bytes. We think an uncompressed CSV file matching 'BZh[1-9]' is all the more rare and unlikely to be encountered in "real" examples. Other formats (zip, gzip) are friendly enough to use non-printable characters in their magic numbers. Thanks @grainnemcguire for the report and @MichaelChirico for the fix. +12. `fread()` is more careful about detecting that a file is compressed in bzip2 format, [#6304](https://github.com/Rdatatable/data.table/issues/6304). In particular, we also check the 4th byte of the file is a digit; in rare cases, a legitimate uncompressed CSV file could match 'BZh' as the first 3 bytes. We think an uncompressed CSV file matching 'BZh[1-9]' is all the more rare and unlikely to be encountered in "real" examples. Other formats (zip, gzip) are friendly enough to use non-printable characters in their magic numbers. Thanks @grainnemcguire for the report and @MichaelChirico for the fix. -15. Selecting the key column like `DT[, .(key1, key2)]` will retain the key without a performance penalty, [#4498](https://github.com/Rdatatable/data.table/issues/4498). Thanks to @user9439449 on StackOverflow for the report and @MichaelChirico for the fix. +13. Selecting the key column like `DT[, .(key1, key2)]` will retain the key without a performance penalty, [#4498](https://github.com/Rdatatable/data.table/issues/4498). Thanks to @user9439449 on StackOverflow for the report and @MichaelChirico for the fix. -16. Passing functions programmatically with `env=` doesn't produce an opaque error, e.g. `DT[, f(b), env = list(f=sum)]`, [#6026](https://github.com/Rdatatable/data.table/issues/6026). Note that it's much better to pass functions like `f="sum"` instead. Thanks to @MichaelChirico for the bug report and fix. +14. Passing functions programmatically with `env=` doesn't produce an opaque error, e.g. `DT[, f(b), env = list(f=sum)]`, [#6026](https://github.com/Rdatatable/data.table/issues/6026). Note that it's much better to pass functions like `f="sum"` instead. Thanks to @MichaelChirico for the bug report and fix. ## NOTES From 13333fd22d08c571383b42626eeca05e8e652ee1 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2024 19:20:38 +0000 Subject: [PATCH 07/14] missing 44 files... --- .github/workflows/code-quality.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index d1499c85a..10bfbdb0b 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -88,6 +88,7 @@ jobs: run: | all_files = list.files(recursive = TRUE) cat(sprintf("Checking line endings of %d files\n", length(all_files))) + cat(sprintf(" Top-level files: %s\n", toString(list.files()))) for (f in list.files(recursive = TRUE)) { # tryCatch() here are to skip binary/plaintext files (e.g. csv) ch <- tryCatch(readChar(f, file.size(f)), condition=identity) From f2b931e50eec0de91feff31dd423a4fc1b17ff47 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2024 19:24:52 +0000 Subject: [PATCH 08/14] ensure the 3 target files are being checked... --- .github/workflows/code-quality.yaml | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 10bfbdb0b..2b5bf4ab3 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -86,21 +86,13 @@ jobs: - uses: actions/checkout@v4 - name: Lint line endings run: | - all_files = list.files(recursive = TRUE) - cat(sprintf("Checking line endings of %d files\n", length(all_files))) - cat(sprintf(" Top-level files: %s\n", toString(list.files()))) for (f in list.files(recursive = TRUE)) { + if (basename(f) %in% c("CODEOWNERS", "R-es.po", "es.po")) cat(sprintf("Checking %s\n", f)) # tryCatch() here are to skip binary/plaintext files (e.g. csv) ch <- tryCatch(readChar(f, file.size(f)), condition=identity) - if (inherits(ch, "condition")) { - cat(sprintf("Skipping %s: readChar() produced %s\n", f, toString(class(ch)))) - next - } + if (inherits(ch, "condition")) next nch <- tryCatch(nchar(ch), condition=identity) - if (inherits(nch, "condition")) { - cat(sprintf("Skipping %s: nchar() produced %s\n", f, toString(class(nch)))) - next - } + if (inherits(nch, "condition")) next if (grepl("\r", ch, fixed = TRUE)) { stop("Line ending \\r found in ", f, ", please stick with \\n only.") From 955b4e26901ec9ba6a22cd1a038c8f4f8f57b1f2 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2024 19:26:42 +0000 Subject: [PATCH 09/14] ensure check is run even with prior failures --- .ci/linters/md/news_linter.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/linters/md/news_linter.R b/.ci/linters/md/news_linter.R index 2ff72be68..698685b84 100644 --- a/.ci/linters/md/news_linter.R +++ b/.ci/linters/md/news_linter.R @@ -55,7 +55,7 @@ any_error = FALSE for (news in list.files(pattern = "NEWS")) { cat(sprintf("Checking NEWS file %s...\n", news)) news_lines = readLines(news) - any_error = any_error || check_section_numbering(news_lines) - any_error = any_error || check_gh_links(news_lines) + any_error = check_section_numbering(news_lines) || any_error + any_error = check_gh_links(news_lines) || any_error } if (any_error) stop("Please fix the NEWS issues above.") From d1c9c3dd08d61091a2d7cc336f19730c255a9755 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2024 19:30:46 +0000 Subject: [PATCH 10/14] is checkout removing the line endings? --- .github/workflows/code-quality.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 2b5bf4ab3..4e207213d 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -87,7 +87,11 @@ jobs: - name: Lint line endings run: | for (f in list.files(recursive = TRUE)) { - if (basename(f) %in% c("CODEOWNERS", "R-es.po", "es.po")) cat(sprintf("Checking %s\n", f)) + if (basename(f) %in% c("CODEOWNERS", "R-es.po", "es.po")) { + nchar = sum(nchar(readLines(f, n=3L))) + 6 # padding for line endings + str = readChar(f, nchar) + cat(sprintf("Here's the first %d characters of %s\n\n%s\n", nchar, f, encodeString(str))) + } # tryCatch() here are to skip binary/plaintext files (e.g. csv) ch <- tryCatch(readChar(f, file.size(f)), condition=identity) if (inherits(ch, "condition")) next From f498818f42cb314281d2bc5efd5cdd7cee46298b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2024 20:03:26 +0000 Subject: [PATCH 11/14] seems actions/checkout is converting the line endigns --- .github/workflows/code-quality.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 4e207213d..f8e1baa83 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -83,6 +83,9 @@ jobs: lint-misc: runs-on: ubuntu-latest steps: + - name: Don't auto-convert line endings during checkout step + run: | + git config --global core.autocrlf false - uses: actions/checkout@v4 - name: Lint line endings run: | From feb0114542a54dcef46acf28c0704352f42c8fd8 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2024 20:23:04 +0000 Subject: [PATCH 12/14] try to fix with gitattributes --- .gitattributes | 1 + .github/workflows/code-quality.yaml | 6 ++---- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.gitattributes b/.gitattributes index 9c72b27ae..da00dd701 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ * -text +* text=auto *.Rraw linguist-language=R diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index f8e1baa83..6bbe417fe 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -77,15 +77,13 @@ jobs: steps: - uses: actions/checkout@v4 - name: Lint - run: | - for (f in list.files('.ci/linters/md', full.names=TRUE)) source(f) + run: for (f in list.files('.ci/linters/md', full.names=TRUE)) source(f) shell: Rscript {0} lint-misc: runs-on: ubuntu-latest steps: - name: Don't auto-convert line endings during checkout step - run: | - git config --global core.autocrlf false + run: git config --global core.autocrlf false - uses: actions/checkout@v4 - name: Lint line endings run: | From 9eba7e58a2164b48eab6ea0c77331d71149b79ac Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2024 20:52:29 +0000 Subject: [PATCH 13/14] use gitattributes to handle this instead of GHA --- .gitattributes | 7 +++---- .github/workflows/code-quality.yaml | 25 ------------------------- 2 files changed, 3 insertions(+), 29 deletions(-) diff --git a/.gitattributes b/.gitattributes index da00dd701..15a6ce90a 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,3 @@ -* -text -* text=auto -*.Rraw linguist-language=R - +* text eol=lf +inst/tests/** -text +inst/tests/*.Rraw text eol=lf linguist-language=R diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml index 6bbe417fe..69bdd5732 100644 --- a/.github/workflows/code-quality.yaml +++ b/.github/workflows/code-quality.yaml @@ -79,28 +79,3 @@ jobs: - name: Lint run: for (f in list.files('.ci/linters/md', full.names=TRUE)) source(f) shell: Rscript {0} - lint-misc: - runs-on: ubuntu-latest - steps: - - name: Don't auto-convert line endings during checkout step - run: git config --global core.autocrlf false - - uses: actions/checkout@v4 - - name: Lint line endings - run: | - for (f in list.files(recursive = TRUE)) { - if (basename(f) %in% c("CODEOWNERS", "R-es.po", "es.po")) { - nchar = sum(nchar(readLines(f, n=3L))) + 6 # padding for line endings - str = readChar(f, nchar) - cat(sprintf("Here's the first %d characters of %s\n\n%s\n", nchar, f, encodeString(str))) - } - # tryCatch() here are to skip binary/plaintext files (e.g. csv) - ch <- tryCatch(readChar(f, file.size(f)), condition=identity) - if (inherits(ch, "condition")) next - nch <- tryCatch(nchar(ch), condition=identity) - if (inherits(nch, "condition")) next - - if (grepl("\r", ch, fixed = TRUE)) { - stop("Line ending \\r found in ", f, ", please stick with \\n only.") - } - } - shell: Rscript {0} From 5e9ab1f100738de380a6572218be663edf251d8a Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 20 Aug 2024 20:55:02 +0000 Subject: [PATCH 14/14] revert gitattributes change here --- .gitattributes | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitattributes b/.gitattributes index 15a6ce90a..9c72b27ae 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,3 @@ -* text eol=lf -inst/tests/** -text -inst/tests/*.Rraw text eol=lf linguist-language=R +* -text +*.Rraw linguist-language=R +