Skip to content

Commit

Permalink
feat: first working prototype downloader
Browse files Browse the repository at this point in the history
  • Loading branch information
Garionion committed Feb 9, 2024
1 parent e1a9720 commit 58dc24d
Show file tree
Hide file tree
Showing 7 changed files with 204 additions and 11 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@

# Go workspace file
go.work

.hibp-data
.idea
13 changes: 13 additions & 0 deletions cmd/cli/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package main

import (
"fmt"

hibpsync "github.com/exaring/go-hibp-sync"
)

func main() {
if err := hibpsync.Sync(); err != nil {
fmt.Printf("sync error: %q", err)
}
}
6 changes: 6 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
module github.com/exaring/go-hibp-sync

go 1.21.6

require (
github.com/alitto/pond v1.8.3 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-retryablehttp v0.7.5 // indirect
)
10 changes: 10 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
github.com/alitto/pond v1.8.3 h1:ydIqygCLVPqIX/USe5EaV/aSRXTRXDEI9JwuDdu+/xs=
github.com/alitto/pond v1.8.3/go.mod h1:CmvIIGd5jKLasGI3D87qDkQxjzChdKMmnXMg3fG6M6Q=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ=
github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48=
github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ=
github.com/hashicorp/go-retryablehttp v0.7.5 h1:bJj+Pj19UZMIweq/iie+1u5YCdGrnxCT9yvm0e+Nd5M=
github.com/hashicorp/go-retryablehttp v0.7.5/go.mod h1:Jy/gPYAdjqffZ/yFGCFV2doI5wjtH1ewM9u8iYVjtX8=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
79 changes: 76 additions & 3 deletions lib.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,24 @@
package hibpsync

import (
"fmt"

"github.com/alitto/pond"
"github.com/hashicorp/go-retryablehttp"
)

const (
defaultDataDir = "./.hibp-data"
defaultEndpoint = "https://api.pwnedpasswords.com/range/"
defaultCheckETag = true
defaultWorkers = 100
)

type syncConfig struct {
dataDir string
endpoint string
checkETag bool
worker int
}

type SyncOption func(*syncConfig)
Expand All @@ -32,17 +41,81 @@ func WithCheckETag(checkETag bool) SyncOption {
}
}

func Sync(options ...SyncOption) {
func WithWorkers(workers int) SyncOption {
return func(c *syncConfig) {
c.worker = workers
}
}

func Sync(options ...SyncOption) error {
config := &syncConfig{
dataDir: defaultDataDir,
endpoint: defaultEndpoint,
checkETag: defaultCheckETag,
worker: defaultWorkers,
}

for _, option := range options {
option(config)
}

// TODO: Implement sync
// We want to use a pool of workers that draw their range from
rG, err := newRangeGenerator(0x00000, 0xFFFFF, "")
if err != nil {
return fmt.Errorf("creating range generator: %w", err)
}

retryClient := retryablehttp.NewClient() //TODO: add dnscache, timeout
retryClient.RetryMax = 10
retryClient.Logger = nil

hc := hibpClient{
endpoint: config.endpoint,
httpClient: retryClient.StandardClient(),
}

storage := fsStorage{
dataDir: config.dataDir,
}

pool := pond.New(config.worker, 0, pond.MinWorkers(config.worker))
defer pool.Stop()

for {
rangeIndex, ok, err := rG.Next()
if err != nil {
return fmt.Errorf("getting next range: %w", err)
}

if !ok {
break
}

if rangeIndex%100 == 0 || rangeIndex < 10 {
fmt.Printf("processing range %d\n", rangeIndex)
}

pool.Submit(func() {
rangePrefix := toRangeString(rangeIndex)
etag, err := storage.LoadETag(rangePrefix)
if err != nil {
fmt.Printf("error loading etag for range %q: %v\n", rangePrefix, err)
return
}

resp, err := hc.RequestRange(rangePrefix, etag)
if err != nil {
fmt.Printf("error requesting range %q: %v\n", rangePrefix, err)
return
}

if resp.NotModified {
return
}
if err := storage.Save(rangePrefix, resp.ETag, resp.Data); err != nil {
fmt.Printf("error saving range %q: %v\n", rangePrefix, err)
}
})
}

return nil
}
102 changes: 95 additions & 7 deletions storage.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
package hibpsync

import "sync"
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"path"
"sync"
)

const (
fileMode = 0666 // TODO ???
dirMode = 0744 // TODO ???
)

type fsStorage struct {
dataDir string
Expand All @@ -13,19 +26,94 @@ func (f *fsStorage) Save(key, etag string, data []byte) error {
f.writeLock.Lock()
defer f.writeLock.Unlock()

// TODO: Implement Save
if err := os.MkdirAll(f.subDir(key), dirMode); err != nil {
return fmt.Errorf("creating data directory: %w", err)
}

filePath := f.filePath(key)
file, err := os.Create(filePath)
if err != nil {
return fmt.Errorf("creating file %q: %w", filePath, err)
}
defer file.Close()

if _, err := file.WriteString(etag + "\n"); err != nil {
return fmt.Errorf("writing etag to file %q: %w", filePath, err)
}

if _, err := file.Write(data); err != nil {
return fmt.Errorf("writing data to file %q: %w", filePath, err)
}

return nil
}

func (f *fsStorage) LoadETag(key string) (string, error) {
// TODO: Implement LoadETag
file, err := os.Open(f.filePath(key))
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return "", nil
}
return "", fmt.Errorf("opening file %q: %w", f.filePath(key), err)
}
defer file.Close()

etag, err := bufio.NewReader(file).ReadString('\n')
if err != nil {
return "", fmt.Errorf("reading etag from file %q: %w", f.filePath(key), err)
}

// Remove the newline character from the etag
return etag[:len(etag)-1], nil
}

func (f *fsStorage) LoadData(key string) (io.ReadCloser, error) {
file, err := os.Open(f.filePath(key))
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return nil, nil
}
return nil, fmt.Errorf("opening file %q: %w", f.filePath(key), err)
}

if err := skipLine(file); err != nil {
file.Close()
return nil, fmt.Errorf("skipping etag line in file %q: %w", f.filePath(key), err)
}

return "", nil
return file, nil
}

func (f *fsStorage) LoadData(key string) ([]byte, error) {
// TODO: Implement LoadData
func skipLine(reader io.ReadSeeker) error {
// Create a new buffered reader for efficient reading
br := bufio.NewReader(reader)

// Read until the first newline character
_, err := br.ReadString('\n')
if err != nil && err != io.EOF {
return err
}

// Get the current offset
offset, err := reader.Seek(0, io.SeekCurrent)
if err != nil {
return err
}

// Seek back to the beginning of the file
_, err = reader.Seek(offset, io.SeekStart)
if err != nil {
return err
}

return nil
}

func (f *fsStorage) subDir(key string) string {
subDir := key[:2]
return path.Join(f.dataDir, subDir)
}

return nil, nil
func (f *fsStorage) filePath(key string) string {
return path.Join(f.subDir(key), key)
}
2 changes: 1 addition & 1 deletion upstream.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (

type hibpClient struct {
endpoint string
httpClient http.Client
httpClient *http.Client
}

type hibpResponse struct {
Expand Down

0 comments on commit 58dc24d

Please sign in to comment.