diff --git a/.zenodo.json b/.zenodo.json index dae93275..25cf6378 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -1,6 +1,6 @@ { "title": "frictionless: Read and Write Frictionless Data Packages", - "version": "1.0.0", + "version": "1.0.2", "description": "Read and write Frictionless Data Packages. A 'Data Package' () is a simple container format and standard to describe and package a collection of (tabular) data. It is typically used to publish FAIR () and open datasets.", "creators": [ { @@ -14,7 +14,7 @@ ], "upload_type": "software", "access_right": "open", - "license": "MIT", + "license": "MIT + file LICENSE", "communities": [ { "identifier": "inbo" diff --git a/DESCRIPTION b/DESCRIPTION index 4da864ed..ad145a40 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: frictionless Title: Read and Write Frictionless Data Packages -Version: 1.0.1.9000 +Version: 1.0.2 Authors@R: c( person("Peter", "Desmet", email = "peter.desmet.work@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-8442-8025")), diff --git a/NEWS.md b/NEWS.md index 709421a9..1e8b4dce 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,10 @@ # frictionless (development version) +# frictionless 1.0.2 + +- Add `testthat::skip_if_offline()` to selected tests and verbosely include + output in vignette examples, to avoid CRAN errors caused by timeouts (#116). + # frictionless 1.0.1 - Rebuild documentation for compatibility with HTML5 on request of CRAN. diff --git a/codemeta.json b/codemeta.json index e53c29a7..daad538a 100644 --- a/codemeta.json +++ b/codemeta.json @@ -8,7 +8,7 @@ "codeRepository": "https://github.com/frictionlessdata/frictionless-r", "issueTracker": "https://github.com/frictionlessdata/frictionless-r/issues", "license": "https://spdx.org/licenses/MIT", - "version": "1.0.1", + "version": "1.0.2", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", @@ -226,7 +226,7 @@ }, "SystemRequirements": null }, - "fileSize": "178.01KB", + "fileSize": "183.204KB", "citation": [ { "@type": "SoftwareSourceCode", @@ -250,7 +250,7 @@ "name": "Read and Write Frictionless Data Packages", "identifier": "10.5281/zenodo.5815355", "url": "https://cran.r-project.org/package=frictionless", - "description": "R package version 1.0.1", + "description": "R package version 1.0.2", "@id": "https://doi.org/10.5281/zenodo.5815355", "sameAs": "https://doi.org/10.5281/zenodo.5815355" } diff --git a/tests/testthat/test-add_resource.R b/tests/testthat/test-add_resource.R index 2730d67f..c1085eb6 100644 --- a/tests/testthat/test-add_resource.R +++ b/tests/testthat/test-add_resource.R @@ -85,6 +85,7 @@ test_that("add_resource() returns error on invalid or empty data frame", { }) test_that("add_resource() returns error if CSV file cannot be found", { + testthat::skip_if_offline() p <- example_package df_csv <- test_path("data/df.csv") schema <- create_schema(data.frame("col_1" = c(1, 2), "col_2" = c("a", "b"))) @@ -98,11 +99,6 @@ test_that("add_resource() returns error if CSV file cannot be found", { "Can't find file at `no_such_file.csv`.", fixed = TRUE ) - expect_error( - add_resource(p, "new", "http://example.com/no_such_file.csv"), - "Can't find file at `http://example.com/no_such_file.csv`.", - fixed = TRUE - ) expect_error( add_resource(p, "new", c(df_csv, "no_such_file.csv")), "Can't find file at `no_such_file.csv`.", @@ -118,6 +114,11 @@ test_that("add_resource() returns error if CSV file cannot be found", { "Can't find file at `no_such_file_1.csv`.", fixed = TRUE ) + expect_error( + add_resource(p, "new", "http://example.com/no_such_file.csv"), + "Can't find file at `http://example.com/no_such_file.csv`.", + fixed = TRUE + ) }) test_that("add_resource() returns error on mismatching schema and data", { @@ -226,6 +227,7 @@ test_that("add_resource() can add resource from data frame, readable by test_that("add_resource() can add resource from local, relative, absolute, remote or compressed CSV file, readable by read_resource()", { + testthat::skip_if_offline() p <- example_package schema <- get_schema(p, "deployments") diff --git a/tests/testthat/test-check_schema.R b/tests/testthat/test-check_schema.R index 2d5933b1..99cf7f5a 100644 --- a/tests/testthat/test-check_schema.R +++ b/tests/testthat/test-check_schema.R @@ -1,4 +1,5 @@ test_that("check_schema() returns TRUE on valid Table Schema", { + testthat::skip_if_offline() p <- example_package # Can't obtain df using read_resource(), because that function uses # check_schema() (in get_schema()) internally, which is what we want to test diff --git a/tests/testthat/test-read_package.R b/tests/testthat/test-read_package.R index b87b60fc..fac845cb 100644 --- a/tests/testthat/test-read_package.R +++ b/tests/testthat/test-read_package.R @@ -1,31 +1,42 @@ -test_that("read_package() returns a valid Data Package, whether reading path or - url", { - # Load example package (locally and remotely) and a valid minimal one +test_that("read_package() returns a valid Data Package reading from path", { + # Load example package locally and a valid minimal one p_path <- system.file("extdata", "datapackage.json", package = "frictionless") - p_url <- file.path("https://raw.githubusercontent.com/frictionlessdata/", - "frictionless-r/main/inst/extdata/datapackage.json") minimal_path <- test_path("data/valid_minimal.json") p_local <- suppressMessages(read_package(p_path)) - p_remote <- suppressMessages(read_package(p_url)) p_minimal <- suppressMessages(read_package(minimal_path)) # Returns a list with required properties expect_true(check_package(p_local)) - expect_true(check_package(p_remote)) expect_true(check_package(p_minimal)) # Package has correct resources resource_names <- c("deployments", "observations", "media") expect_identical(resources(p_local), resource_names) - expect_identical(resources(p_remote), resource_names) expect_identical(resources(p_minimal), resource_names) # Package has correct "directory", containing root dir of datapackage.json expect_identical(p_local$directory, gsub("/datapackage.json", "", p_path)) - expect_identical(p_remote$directory, gsub("/datapackage.json", "", p_url)) expect_identical(p_minimal$directory, "data") }) +test_that("read_package() returns a valid Data Package reading from url", { + testthat::skip_if_offline() + # Load example package remotely + p_url <- file.path("https://raw.githubusercontent.com/frictionlessdata/", + "frictionless-r/main/inst/extdata/datapackage.json") + p_remote <- suppressMessages(read_package(p_url)) + + # Returns a list with required properties + expect_true(check_package(p_remote)) + + # Package has correct resources + resource_names <- c("deployments", "observations", "media") + expect_identical(resources(p_remote), resource_names) + + # Package has correct "directory", containing root dir of datapackage.json + expect_identical(p_remote$directory, gsub("/datapackage.json", "", p_url)) +}) + test_that("read_package() shows message about usage norms", { # Load example package and a minimal valid one a URL in "id" p_path <- system.file("extdata", "datapackage.json", package = "frictionless") @@ -55,6 +66,7 @@ test_that("read_package() shows message about usage norms", { }) test_that("read_package() returns error on missing file and properties", { + testthat::skip_if_offline() # Incorrect type expect_error( read_package(list()), @@ -62,17 +74,12 @@ test_that("read_package() returns error on missing file and properties", { fixed = TRUE ) - # No file + # No file locally expect_error( read_package("nofile.json"), "Can't find file at `nofile.json`", fixed = TRUE ) - expect_error( - read_package("http://example.com/nofile.json"), - "Can't find file at `http://example.com/nofile.json`.", - fixed = TRUE - ) # Not a json file expect_error( @@ -111,6 +118,13 @@ test_that("read_package() returns error on missing file and properties", { ), fixed = TRUE ) + + # No file remotely + expect_error( + read_package("http://example.com/nofile.json"), + "Can't find file at `http://example.com/nofile.json`.", + fixed = TRUE + ) }) test_that("read_package() allows descriptor at absolute or relative parent diff --git a/tests/testthat/test-read_resource.R b/tests/testthat/test-read_resource.R index 9cb53697..dd8a94f4 100644 --- a/tests/testthat/test-read_resource.R +++ b/tests/testthat/test-read_resource.R @@ -1,4 +1,5 @@ test_that("read_resource() returns a tibble", { + testthat::skip_if_offline() p <- example_package df <- data.frame("col_1" = c(1, 2), "col_2" = c("a", "b")) p <- add_resource(p, "new", df) @@ -20,6 +21,7 @@ test_that("read_resource() returns error on incorrect Data Package", { }) test_that("read_resource() returns error on incorrect resource", { + testthat::skip_if_offline() p <- example_package # No such resource @@ -178,6 +180,7 @@ test_that("read_resource() can read inline data (ignoring schema)", { }) test_that("read_resource() can read local files", { + testthat::skip_if_offline() p <- example_package resource <- read_resource(p, "deployments") # local resource, remote package @@ -188,6 +191,7 @@ test_that("read_resource() can read local files", { }) test_that("read_resource() can read remote files", { + testthat::skip_if_offline() p <- example_package resource <- read_resource(p, "deployments") # local resource, remote package @@ -201,6 +205,7 @@ test_that("read_resource() can read remote files", { test_that("read_resource() can read safe local and remote Table Schema, including YAML", { + testthat::skip_if_offline() p <- example_package resource <- read_resource(p, "deployments") p$directory <- "." @@ -255,6 +260,7 @@ test_that("read_resource() can read safe local and remote Table Schema, }) test_that("read_resource() can read safe local and remote CSV dialect", { + testthat::skip_if_offline() p <- example_package resource <- read_resource(p, "deployments") p$directory <- "." @@ -307,6 +313,7 @@ test_that("read_resource() can read safe local and remote CSV dialect", { }) test_that("read_resource() understands CSV dialect", { + testthat::skip_if_offline() p <- example_package resource <- read_resource(p, "deployments") @@ -336,6 +343,7 @@ test_that("read_resource() understands CSV dialect", { }) test_that("read_resource() understands missing values", { + testthat::skip_if_offline() p <- example_package resource <- read_resource(p, "deployments") @@ -350,6 +358,7 @@ test_that("read_resource() understands missing values", { }) test_that("read_resource() understands encoding", { + testthat::skip_if_offline() p <- example_package resource <- read_resource(p, "deployments") @@ -438,6 +447,7 @@ test_that("read_resource() handles LF and CRLF line terminator characters", { # # read_delim() however only handles 2 line terminator characters (LF and CRLF) # without explicitly indicating them, so dialect$lineTerminator is ignored + testthat::skip_if_offline() p <- example_package resource <- read_resource(p, "deployments") # This file has LF @@ -449,6 +459,7 @@ test_that("read_resource() handles LF and CRLF line terminator characters", { }) test_that("read_resource() can read compressed files", { + testthat::skip_if_offline() p <- example_package resource <- read_resource(p, "deployments") diff --git a/tests/testthat/test-remove_resource.R b/tests/testthat/test-remove_resource.R index 3c573d16..ef248fce 100644 --- a/tests/testthat/test-remove_resource.R +++ b/tests/testthat/test-remove_resource.R @@ -1,4 +1,5 @@ test_that("remove_resource() returns a valid Data Package", { + testthat::skip_if_offline() p <- example_package expect_true(check_package(remove_resource(p, "deployments"))) }) @@ -15,6 +16,7 @@ test_that("remove_resource() returns error on incorrect Data Package", { }) test_that("remove_resource() returns error when resource not found", { + testthat::skip_if_offline() p <- example_package expect_error( remove_resource(p, "no_such_resource"), @@ -27,6 +29,7 @@ test_that("remove_resource() returns error when resource not found", { }) test_that("remove_resource() removes resource", { + testthat::skip_if_offline() p <- example_package # Remove "deployments", keep "observations" and "media diff --git a/tests/testthat/test-resources.R b/tests/testthat/test-resources.R index 9dee387f..482752f6 100644 --- a/tests/testthat/test-resources.R +++ b/tests/testthat/test-resources.R @@ -1,4 +1,5 @@ test_that("resources() returns a character vector of resource names", { + testthat::skip_if_offline() p <- example_package expect_identical(resources(p), c("deployments", "observations", "media")) diff --git a/tests/testthat/test-write_package.R b/tests/testthat/test-write_package.R index f7bab285..bf723a99 100644 --- a/tests/testthat/test-write_package.R +++ b/tests/testthat/test-write_package.R @@ -1,4 +1,5 @@ test_that("write_package() returns output Data Package (invisibly)", { + testthat::skip_if_offline() p <- example_package # Note write_package() is expected to create directory without warning dir <- file.path(tempdir(), "package") @@ -40,6 +41,7 @@ test_that("write_package() returns error if Data Package has no resource(s)", { }) test_that("write_package() writes unaltered datapackage.json as is", { + testthat::skip_if_offline() p_file <- system.file("extdata", "datapackage.json", package = "frictionless") json_original <- readr::read_lines(p_file) # Will use line endings of system @@ -55,6 +57,7 @@ test_that("write_package() writes unaltered datapackage.json as is", { }) test_that("write_package() does not overwrite existing data files", { + testthat::skip_if_offline() p <- suppressMessages(read_package( system.file("extdata", "datapackage.json", package = "frictionless") )) @@ -80,6 +83,7 @@ test_that("write_package() does not overwrite existing data files", { }) test_that("write_package() copies file(s) for path = local in local package", { + testthat::skip_if_offline() p <- suppressMessages(read_package( system.file("extdata", "datapackage.json", package = "frictionless") )) @@ -108,6 +112,7 @@ test_that("write_package() copies file(s) for path = local in local package", { test_that("write_package() downloads file(s) for path = local in remote package", { + testthat::skip_if_offline() p <- example_package p$resources[[2]]$path[[2]] <- "observations_2.csv" # Make one URL a local path p <- add_resource(p, "new", test_path("data/df.csv")) @@ -133,6 +138,7 @@ test_that("write_package() downloads file(s) for path = local in remote }) test_that("write_package() leaves as is for path = URL in local package", { + testthat::skip_if_offline() p <- suppressMessages(read_package( system.file("extdata", "datapackage.json", package = "frictionless") )) @@ -155,6 +161,7 @@ test_that("write_package() leaves as is for path = URL in local package", { }) test_that("write_package() leaves as is for path = URL in remote package", { + testthat::skip_if_offline() p <- example_package p <- add_resource(p, "new", file.path( "https://raw.githubusercontent.com/frictionlessdata/frictionless-r", @@ -175,6 +182,7 @@ test_that("write_package() leaves as is for path = URL in remote package", { }) test_that("write_package() leaves as is for data = json in local package", { + testthat::skip_if_offline() p <- suppressMessages(read_package( system.file("extdata", "datapackage.json", package = "frictionless") )) @@ -190,6 +198,7 @@ test_that("write_package() leaves as is for data = json in local package", { }) test_that("write_package() leaves as is for data = json in remote package", { + testthat::skip_if_offline() p <- example_package dir <- file.path(tempdir(), "package") on.exit(unlink(dir, recursive = TRUE)) @@ -203,6 +212,7 @@ test_that("write_package() leaves as is for data = json in remote package", { }) test_that("write_package() creates file for data = df in local package", { + testthat::skip_if_offline() p <- suppressMessages(read_package( system.file("extdata", "datapackage.json", package = "frictionless") )) @@ -221,6 +231,7 @@ test_that("write_package() creates file for data = df in local package", { }) test_that("write_package() creates file for data = df in remote package", { + testthat::skip_if_offline() p <- example_package df <- data.frame("col_1" = c(1, 2), "col_2" = c("a", "b")) p <- add_resource(p, "new", df) @@ -237,6 +248,7 @@ test_that("write_package() creates file for data = df in remote package", { }) test_that("write_package() shows message when downloading file", { + testthat::skip_if_offline() p <- example_package dir <- file.path(tempdir(), "package") on.exit(unlink(dir, recursive = TRUE)) @@ -251,6 +263,7 @@ test_that("write_package() shows message when downloading file", { }) test_that("write_package() sets correct properties for data frame resources", { + testthat::skip_if_offline() p <- example_package df <- data.frame("col_1" = c(1, 2), "col_2" = c("a", "b")) schema <- create_schema(df) @@ -280,6 +293,7 @@ test_that("write_package() sets correct properties for data frame resources", { }) test_that("write_package() will gzip file for compress = TRUE", { + testthat::skip_if_offline() p <- example_package df <- data.frame("col_1" = c(1, 2), "col_2" = c("a", "b")) p <- add_resource(p, "new", df) diff --git a/vignettes/frictionless.Rmd b/vignettes/frictionless.Rmd index 34c0ef01..86e5bd65 100644 --- a/vignettes/frictionless.Rmd +++ b/vignettes/frictionless.Rmd @@ -32,38 +32,91 @@ library(frictionless) To read a Data Package, you need to know the path or URL to its descriptor file, named `datapackage.json`. That file describes the Data Package, provides access points to its Data Resources and can contain dataset-level metadata. Let's read a Data Package descriptor file published on [Zenodo](https://doi.org/10.5281/zenodo.5879096): -```{r} +``` r package <- read_package("https://zenodo.org/record/5879096/files/datapackage.json") +#> Please make sure you have the right to access data from this Data Package for your intended use. +#> Follow applicable norms or requirements to credit the dataset and its authors. +#> For more information, see https://doi.org/10.5281/zenodo.5879096 ``` `read_package()` returns the content of `datapackage.json` as a list, printed here with `str()` to improve readability: -```{r} +```r str(package, list.len = 3) +#> List of 4 +#> $ id : chr "https://doi.org/10.5281/zenodo.5879096" +#> $ profile : chr "tabular-data-package" +#> $ resources:List of 3 +#> ..$ :List of 7 +#> .. ..$ name : chr "reference-data" +#> .. ..$ path : chr "O_WESTERSCHELDE-reference-data.csv" +#> .. ..$ profile : chr "tabular-data-resource" +#> .. .. [list output truncated] +#> ..$ :List of 7 +#> .. ..$ name : chr "gps" +#> .. ..$ path : chr [1:3] "O_WESTERSCHELDE-gps-2018.csv.gz" "O_WESTERSCHELDE-gps-2019.csv.gz" "O_WESTERSCHELDE-gps-2020.csv.gz" +#> .. ..$ profile : chr "tabular-data-resource" +#> .. .. [list output truncated] +#> ..$ :List of 7 +#> .. ..$ name : chr "acceleration" +#> .. ..$ path : chr [1:3] "O_WESTERSCHELDE-acceleration-2018.csv.gz" "O_WESTERSCHELDE-acceleration-2019.csv.gz" "O_WESTERSCHELDE-acceleration-2020.csv.gz" +#> .. ..$ profile : chr "tabular-data-resource" +#> .. .. [list output truncated] +#> [list output truncated] ``` The most important aspect of a Data Package are its **Data Resources**, which describe and point to the data. You can list all included resources with `resources()`: -```{r} +```r resources(package) +#> [1] "reference-data" "gps" "acceleration" ``` This Data Package has 3 resources. Let's read the data from the `gps` resource into a data frame: -```{r} +```r gps <- read_resource(package, "gps") gps +#> # A tibble: 73,047 × 21 +#> event-i…¹ visible timestamp locat…² locat…³ bar:b…⁴ exter…⁵ gps:d…⁶ +#> +#> 1 1.43e10 TRUE 2018-05-25 16:11:37 4.25 51.3 NA 32.5 2 +#> 2 1.43e10 TRUE 2018-05-25 16:16:41 4.25 51.3 NA 32.8 2.1 +#> 3 1.43e10 TRUE 2018-05-25 16:21:29 4.25 51.3 NA 34.1 2.1 +#> 4 1.43e10 TRUE 2018-05-25 16:26:28 4.25 51.3 NA 34.5 2.2 +#> 5 1.43e10 TRUE 2018-05-25 16:31:21 4.25 51.3 NA 34.1 2.2 +#> 6 1.43e10 TRUE 2018-05-25 16:36:09 4.25 51.3 NA 32.5 2.2 +#> 7 1.43e10 TRUE 2018-05-25 16:40:57 4.25 51.3 NA 32.1 2.2 +#> 8 1.43e10 TRUE 2018-05-25 16:45:55 4.25 51.3 NA 33.3 2.1 +#> 9 1.43e10 TRUE 2018-05-25 16:50:49 4.25 51.3 NA 32.6 2.1 +#> 10 1.43e10 TRUE 2018-05-25 16:55:36 4.25 51.3 NA 31.7 2 +#> # … with 73,037 more rows, 13 more variables: `gps:satellite-count` , +#> # `gps-time-to-fix` , `ground-speed` , heading , +#> # `height-above-msl` , `location-error-numerical` , +#> # `manually-marked-outlier` , `vertical-error-numerical` , +#> # `sensor-type` , `individual-taxon-canonical-name` , +#> # `tag-local-identifier` , `individual-local-identifier` , +#> # `study-name` , and abbreviated variable names ¹​`event-id`, … ``` The data frame contains all GPS records, even though the actual data were split over [multiple CSV zipped files](https://zenodo.org/record/5879096#files). `read_resource()` assigned the column names and types based on the Table Schema that was defined for that resource, not the headers of the CSV file. You can also read data from a local (e.g. downloaded) Data Package. In fact, there is one included in the frictionless pkg, let's read that one from disk: -```{r} +```r local_package <- read_package( system.file("extdata", "datapackage.json", package = "frictionless") ) +#> Please make sure you have the right to access data from this Data Package for your intended use. +#> Follow applicable norms or requirements to credit the dataset and its authors. read_resource(local_package, "media") +#> # A tibble: 3 × 5 +#> media_id deployment_id observati…¹ times…² file_…³ +#> +#> 1 aed5fa71-3ed4-4284-a6ba-3550d1a4de8d 1 1-1 2020-0… https:… +#> 2 da81a501-8236-4cbd-aa95-4bc4b10a05df 1 1-1 2020-0… https:… +#> 3 0ba57608-3cf1-49d6-a5a2-fe680851024d 1 1-1 2020-0… https:… +#> # … with abbreviated variable names ¹​observation_id, ²​timestamp, ³​file_path ``` Data from the `media` was not stored in a CSV file, but directly in the `data` property of that resource in `datapackage.json`. `read_resource()` will automatically detect where to read data from. @@ -161,7 +214,7 @@ my_package <- If you already have your data stored as CSV files and you want to include them _as is_ as a Data Resource, you can do so as well. As with data frames, you can opt to create a Table Schema automatically or provide your own. -```{r} +```r # Two CSV files with the same structure path_1 <- system.file("extdata", "observations_1.csv", package = "frictionless") path_2 <- system.file("extdata", "observations_2.csv", package = "frictionless") @@ -192,8 +245,10 @@ write_package(my_package, "my_directory") The directory will contain four files: the descriptor `datapackage.json`, one CSV file containing the data for the resource `iris` and two CSV files containing the data for the resource `observations`. -```{r} +```r list.files("my_directory") +#> [1] "datapackage.json" "iris.csv" "observations_1.csv" +#> [4] "observations_2.csv" ``` ```{r, include = FALSE}