Skip to content

Commit

Permalink
git-annex: support downloading over HTTP (#6)
Browse files Browse the repository at this point in the history
This makes HTTP symmetric with SSH clone URLs.

This gives us the fancy feature of _anonymous_ downloads,
so people can access datasets without having to set up an
account or manage ssh keys.

Previously, to access "open access" data shared this way,
users would need to:

  1. Create an account on gitea.example.com
  2. Create ssh keys
  3. Upload ssh keys (and make sure to find and upload the correct file)
  4. `git clone [email protected]:user/dataset.git`
  5. `cd dataset`
  6. `git annex get`

This cuts that down to just the last three steps:

  1. `git clone https://gitea.example.com/user/dataset.git`
  2. `cd dataset`
  3. `git annex get`

This is significantly simpler for downstream users, especially for those
unfamiliar with the command line.

Unfortunately there's no uploading. While git-annex supports uploading
over HTTP to S3 and some other special remotes, it seems to fail on a
_plain_ HTTP remote. See neuropoly#7
and https://git-annex.branchable.com/forum/HTTP_uploads/#comment-ce28adc128fdefe4c4c49628174d9b92.

This is not a major loss since no one wants uploading to be anonymous anyway.

To support private repos, I had to hunt down and patch a secret extra security
corner that Gitea only applies to HTTP for some reason (services/auth/basic.go).

This was guided by https://git-annex.branchable.com/tips/setup_a_public_repository_on_a_web_site/

Fixes neuropoly#3

Co-authored-by: Mathieu Guay-Paquet <[email protected]>
  • Loading branch information
2 people authored and actions-user committed Nov 10, 2023
1 parent ffd658c commit 3e5e2e6
Show file tree
Hide file tree
Showing 7 changed files with 412 additions and 17 deletions.
3 changes: 2 additions & 1 deletion modules/git/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -439,12 +439,13 @@ func (c *Command) RunStdBytes(opts *RunOpts) (stdout, stderr []byte, runErr RunS
}

// AllowLFSFiltersArgs return globalCommandArgs with lfs filter, it should only be used for tests
// It also re-enables git-credential(1), which is used to test git-annex's HTTP support
func AllowLFSFiltersArgs() TrustedCmdArgs {
// Now here we should explicitly allow lfs filters to run
filteredLFSGlobalArgs := make(TrustedCmdArgs, len(globalCommandArgs))
j := 0
for _, arg := range globalCommandArgs {
if strings.Contains(string(arg), "lfs") {
if strings.Contains(string(arg), "lfs") || strings.Contains(string(arg), "credential") {
j--
} else {
filteredLFSGlobalArgs[j] = arg
Expand Down
31 changes: 31 additions & 0 deletions routers/web/repo/githttp.go
Original file line number Diff line number Diff line change
Expand Up @@ -611,3 +611,34 @@ func GetIdxFile(ctx *context.Context) {
h.sendFile("application/x-git-packed-objects-toc", "objects/pack/pack-"+ctx.Params("file")+".idx")
}
}

// GetAnnexObject implements git-annex dumb HTTP
func GetAnnexObject(ctx *context.Context) {
h := httpBase(ctx)
if h != nil {
// git-annex objects are stored in .git/annex/objects/{hash1}/{hash2}/{key}/{key}
// where key is a string containing the size and (usually SHA256) checksum of the file,
// and hash1+hash2 are the first few bits of the md5sum of key itself.
// ({hash1}/{hash2}/ is just there to avoid putting too many files in one directory)
// ref: https://git-annex.branchable.com/internals/hashing/

// keyDir should = key, but we don't enforce that
object := path.Join(ctx.Params("hash1"), ctx.Params("hash2"), ctx.Params("keyDir"), ctx.Params("key"))

// Sanitize the input against directory traversals.
//
// This works because at the filesystem root, "/.." = "/";
// So if a path starts rooted ("/"), path.Clean(), which
// path.Join() calls internally, removes all '..' prefixes.
// After, this unroots the path unconditionally ([1:]), which
// works because we know the input is never supposed to be rooted.
//
// The router code probably also disallows "..", so this
// should be redundant, but it's defensive to keep it
// whenever touching filesystem paths with user input.
object = path.Join("/", object)[1:]

h.setHeaderCacheForever()
h.sendFile("application/octet-stream", "annex/objects/"+object)
}
}
13 changes: 13 additions & 0 deletions routers/web/web.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,13 @@ func registerRoutes(m *web.Route) {
}
}

annexEnabled := func(ctx *context.Context) {
if !setting.Annex.Enabled {
ctx.Error(http.StatusNotFound)
return
}
}

federationEnabled := func(ctx *context.Context) {
if !setting.Federation.Enabled {
ctx.Error(http.StatusNotFound)
Expand Down Expand Up @@ -1514,6 +1521,12 @@ func registerRoutes(m *web.Route) {
})
}, ignSignInAndCsrf, lfsServerEnabled)

m.Group("", func() {
// for git-annex
m.GetOptions("/config", repo.GetTextFile("config")) // needed by clients reading annex.uuid during `git annex initremote`
m.GetOptions("/annex/objects/{hash1}/{hash2}/{keyDir}/{key}", repo.GetAnnexObject)
}, ignSignInAndCsrf, annexEnabled, context_service.UserAssignmentWeb())

gitHTTPRouters(m)
})
})
Expand Down
11 changes: 11 additions & 0 deletions services/auth/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,17 @@ func isGitRawOrAttachOrLFSPath(req *http.Request) bool {
return false
}

var annexPathRe = regexp.MustCompile(`^/[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+/annex/`)

func isAnnexPath(req *http.Request) bool {
if setting.Annex.Enabled {
// "/config" is git's config, not specifically git-annex's; but the only current
// user of it is when git-annex downloads the annex.uuid during 'git annex init'.
return strings.HasSuffix(req.URL.Path, "/config") || annexPathRe.MatchString(req.URL.Path)
}
return false
}

// handleSignIn clears existing session variables and stores new ones for the specified user object
func handleSignIn(resp http.ResponseWriter, req *http.Request, sess SessionStore, user *user_model.User) {
// We need to regenerate the session...
Expand Down
4 changes: 2 additions & 2 deletions services/auth/basic.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ func (b *Basic) Name() string {
// name/token on successful validation.
// Returns nil if header is empty or validation fails.
func (b *Basic) Verify(req *http.Request, w http.ResponseWriter, store DataStore, sess SessionStore) (*user_model.User, error) {
// Basic authentication should only fire on API, Download or on Git or LFSPaths
if !middleware.IsAPIPath(req) && !isContainerPath(req) && !isAttachmentDownload(req) && !isGitRawOrAttachOrLFSPath(req) {
// Basic authentication should only fire on API, Download or on Git or LFSPaths or Git-Annex paths
if !middleware.IsAPIPath(req) && !isContainerPath(req) && !isAttachmentDownload(req) && !isGitRawOrAttachOrLFSPath(req) && !isAnnexPath(req) {
return nil, nil
}

Expand Down
Loading

0 comments on commit 3e5e2e6

Please sign in to comment.