diff --git a/cmd/metha-sync/main.go b/cmd/metha-sync/main.go index 2e80af0b..e1c7a84f 100644 --- a/cmd/metha-sync/main.go +++ b/cmd/metha-sync/main.go @@ -20,6 +20,7 @@ var ( baseDir = flag.String("base-dir", metha.GetBaseDir(), "base dir for harvested files") hourly = flag.Bool("hourly", false, "use hourly intervals for harvesting") daily = flag.Bool("daily", false, "use daily intervals for harvesting") + delay = flag.Int("delay", 0, "sleep between each OAI-PMH request") disableSelectiveHarvesting = flag.Bool("no-intervals", false, "harvest in one go, for funny endpoints") endpointList = flag.Bool("list", false, "list a selection of OAI endpoints (might be outdated)") format = flag.String("format", "oai_dc", "metadata format") @@ -118,6 +119,7 @@ func main() { harvest.HourlyInterval = *hourly harvest.DailyInterval = *daily harvest.ExtraHeaders = extra + harvest.Delay = *delay log.Printf("harvest: %+v", harvest) if *removeCached { log.Printf("removing already cached files from %s", harvest.Dir()) diff --git a/harvest.go b/harvest.go index cb04942a..2340ef29 100644 --- a/harvest.go +++ b/harvest.go @@ -67,6 +67,8 @@ type Harvest struct { DailyInterval bool ExtraHeaders http.Header + Delay int + // XXX: Lazy via sync.Once? Identify *Identify Started time.Time @@ -335,6 +337,10 @@ func (h *Harvest) runInterval(iv Interval) error { req.From = iv.Begin.Format(h.DateLayout()) req.Until = iv.End.Format(h.DateLayout()) } + + if h.Delay > 0 { + time.Sleep(time.Duration(h.Delay) * time.Second) + } // Do request, return any http error, except when we ignore HTTPErrors - in that case, break out early. resp, err := Do(&req) if err != nil {