Skip to content

Commit

Permalink
create normalized DwCA (close #11)
Browse files Browse the repository at this point in the history
Normalized data also closes disambituation of scientific
names, hiearchies and synonyms (close #5, close #6, close #7)
  • Loading branch information
dimus committed Feb 26, 2024
1 parent 85a0480 commit 9cf3a06
Show file tree
Hide file tree
Showing 32 changed files with 2,540 additions and 100 deletions.
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# DwCA is a Go library to deal with Darwin Core Archive files.


## Testing

To avoid conflicts in filesystem run tests in sequential order.

```bash
go test -p 1 -count=1 ./...
```

45 changes: 32 additions & 13 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,58 @@ import (
"path/filepath"
)

// Config is a configuration object for the Darwin Core Archive (DwCA)
// data processing.
type Config struct {
Path string
// RootPath is the root path for all temporary files.
RootPath string

// DownloadPath is used to store downloaded files.
DownloadPath string
ExtractPath string
WithCleanup bool

// ExtractPath is used to store extracted files of DwCA archive.
ExtractPath string

// OutputPath is used to store uncompressed files of a normalized
// DwCA archive. This files are created from the original DwCA archive
// data.
OutputPath string

// JobsNum is the number of concurrent jobs to run.
JobsNum int
}

// Option is a function type that allows to standardize how options to
// the configuration are organized.
type Option func(*Config)

// OptPath sets the root path for all temporary files.
func OptPath(s string) Option {
return func(c *Config) {
c.Path = s
}
}

func OptWithCleanup(b bool) Option {
return func(c *Config) {
c.WithCleanup = b
c.RootPath = s
}
}

// New creates a new Config object with default values, and allows to
// override them with options.
func New(opts ...Option) Config {
path, err := os.UserCacheDir()
if err != nil {
path = os.TempDir()
}

path = filepath.Join(path, "dwca_go")
c := Config{Path: path}
c := Config{
RootPath: path,
JobsNum: 5,
}

for _, opt := range opts {
opt(&c)
}
c.DownloadPath = filepath.Join(c.Path, "download")
c.ExtractPath = filepath.Join(c.Path, "extract")

c.DownloadPath = filepath.Join(c.RootPath, "download")
c.ExtractPath = filepath.Join(c.RootPath, "extract")
c.OutputPath = filepath.Join(c.RootPath, "output")
return c
}
6 changes: 2 additions & 4 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,11 @@ import (
func TestConfigDefault(t *testing.T) {
assert := assert.New(t)
conf := config.New()
assert.Contains(conf.Path, "dwca_go")
assert.Contains(conf.RootPath, "dwca_go")

opts := []config.Option{
config.OptPath("test"),
config.OptWithCleanup(true),
}
conf = config.New(opts...)
assert.Equal("test", conf.Path)
assert.True(conf.WithCleanup)
assert.Equal("test", conf.RootPath)
}
Loading

0 comments on commit 9cf3a06

Please sign in to comment.