-
Notifications
You must be signed in to change notification settings - Fork 4
/
config.go
66 lines (56 loc) · 1.41 KB
/
config.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
package vozer
import (
"errors"
"fmt"
"net/url"
"os"
"path"
)
type VozerConfig struct {
ThreadURL string `json:"thread_url"`
NuWorkers uint `json:"workers"`
IsCrawlURLs bool `json:"is_crawl_urls"`
IsCrawlImages bool `json:"is_crawl_images"`
DestPath string `json:"destination_path"`
Retries uint `json:"retries"`
CrawlPages []uint `json:"crawl_pages"`
CrawlFromPage uint `json:"crawl_from_page"`
CrawlToPage uint `json:"crawl_to_page"`
}
func (c *VozerConfig) Validate() error {
if c.ThreadURL == "" {
return errors.New("URL to VOZ thread must be specified")
}
u, err := url.Parse(c.ThreadURL)
if err != nil {
return fmt.Errorf("Invalid URL: %s", err)
}
if u.Host != "forums.voz.vn" {
return errors.New("Invalid URL, must point to a VOZ thread")
}
if c.NuWorkers == 0 {
c.NuWorkers = 10
}
if c.NuWorkers > 100 {
c.NuWorkers = 100
}
if !c.IsCrawlURLs && c.IsCrawlImages {
return errors.New("Must specify which data you want to crawl? (images, URLs or both)")
}
if c.DestPath == "" {
dp, _ := os.Getwd()
c.DestPath = path.Join(dp, "data")
}
if c.Retries > 50 {
c.Retries = 50
}
for i := range c.CrawlPages {
if c.CrawlPages[i] == 0 {
c.CrawlPages = append(c.CrawlPages[:i], c.CrawlPages[i+1:]...)
}
}
if c.CrawlFromPage > c.CrawlToPage {
return fmt.Errorf("Invalid page range: %d-%d", c.CrawlFromPage, c.CrawlToPage)
}
return nil
}