Skip to content

Commit

Permalink
remove proxied images from sanity check
Browse files Browse the repository at this point in the history
Previously, proxied and local images were checked for presence in the
storage before previewing or posting the comment. That logic resulted in
 an inability to post with an image when a proxy for images is enabled,
 as proxied images are not downloaded to disk before the first time
 someone loads them, which could only happen after the user either
 previews or posts the message.

After this change, preview and post only checks the local images'
presence and ignore the proxied ones.
  • Loading branch information
paskal authored and umputun committed Jul 23, 2023
1 parent 235f0da commit c72f30e
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 47 deletions.
10 changes: 5 additions & 5 deletions backend/app/rest/api/rest_private.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ func (s *private) previewCommentCtrl(w http.ResponseWriter, r *http.Request) {
comment = s.commentFormatter.Format(comment)
comment.Sanitize()

// check if images are valid
for _, id := range s.imageService.ExtractPictures(comment.Text) {
// check if images are valid, omit proxied images as they are lazy-loaded
for _, id := range s.imageService.ExtractNonProxiedPictures(comment.Text) {
err := s.imageService.ResetCleanupTimer(id)
if err != nil {
rest.SendErrorJSON(w, r, http.StatusBadRequest, err, "can't renew staged picture cleanup timer", rest.ErrImgNotFound)
rest.SendErrorJSON(w, r, http.StatusBadRequest, err, "can't load picture from the comment", rest.ErrImgNotFound)
return
}
}
Expand Down Expand Up @@ -129,8 +129,8 @@ func (s *private) createCommentCtrl(w http.ResponseWriter, r *http.Request) {
}
comment = s.commentFormatter.Format(comment)

// check if images are valid
for _, id := range s.imageService.ExtractPictures(comment.Text) {
// check if images are valid, omit proxied images as they are lazy-loaded
for _, id := range s.imageService.ExtractNonProxiedPictures(comment.Text) {
_, err := s.imageService.Load(id)
if err != nil {
rest.SendErrorJSON(w, r, http.StatusBadRequest, err, "can't load picture from the comment", rest.ErrImgNotFound)
Expand Down
84 changes: 83 additions & 1 deletion backend/app/rest/api/rest_private_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"io"
"mime/multipart"
"net/http"
"net/http/httptest"
"os"
"strings"
"testing"
Expand All @@ -24,6 +25,7 @@ import (
"github.com/stretchr/testify/require"

"github.com/umputun/remark42/backend/app/notify"
"github.com/umputun/remark42/backend/app/rest/proxy"
"github.com/umputun/remark42/backend/app/store"
"github.com/umputun/remark42/backend/app/store/image"
)
Expand Down Expand Up @@ -69,7 +71,7 @@ func TestRest_CreateFilteredCode(t *testing.T) {

c := R.JSON{}
err = json.Unmarshal(b, &c)
assert.NoError(t, err)
require.NoError(t, err, string(b))
loc := c["locator"].(map[string]interface{})
assert.Equal(t, "remark42", loc["site"])
assert.Equal(t, "https://radio-t.com/blah1", loc["url"])
Expand All @@ -79,6 +81,86 @@ func TestRest_CreateFilteredCode(t *testing.T) {
assert.True(t, len(c["id"].(string)) > 8)
}

// based on issue https://github.com/umputun/remark42/issues/1631
func TestRest_CreateAndPreviewWithImage(t *testing.T) {
ts, srv, teardown := startupT(t)
ts.Close()
defer teardown()

srv.ImageService.ProxyAPI = srv.RemarkURL + "/api/v1/img"
srv.ImageProxy = &proxy.Image{
HTTP2HTTPS: true,
CacheExternal: true,
RoutePath: "/api/v1/img",
RemarkURL: srv.RemarkURL,
ImageService: srv.ImageService,
}
srv.CommentFormatter = store.NewCommentFormatter(srv.ImageProxy)
// need to recreate the server with new ImageProxy, otherwise old one will be used
ts = httptest.NewServer(srv.routes())
defer ts.Close()

var pngRead bool
// server with the test PNG image
pngServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_, e := io.Copy(w, gopherPNG())
assert.NoError(t, e)
pngRead = true
}))
defer pngServer.Close()

t.Run("create", func(t *testing.T) {
resp, err := post(t, ts.URL+"/api/v1/comment",
`{"text": "![](`+pngServer.URL+`/gopher.png)", "locator":{"url": "https://radio-t.com/blah1", "site": "remark42"}}`)
assert.NoError(t, err)
b, err := io.ReadAll(resp.Body)
assert.NoError(t, err)
require.Equal(t, http.StatusCreated, resp.StatusCode, string(b))
assert.NoError(t, resp.Body.Close())

c := R.JSON{}
err = json.Unmarshal(b, &c)
require.NoError(t, err, string(b))
assert.NotContains(t, c["text"], pngServer.URL)
assert.Contains(t, c["text"], srv.RemarkURL)
loc := c["locator"].(map[string]interface{})
assert.Equal(t, "remark42", loc["site"])
assert.Equal(t, "https://radio-t.com/blah1", loc["url"])
assert.True(t, len(c["id"].(string)) > 8)
assert.Equal(t, false, pngRead, "original image is not yet accessed by server")
})

t.Run("preview", func(t *testing.T) {
resp, err := post(t, ts.URL+"/api/v1/preview",
`{"text": "![](`+pngServer.URL+`/gopher.png)", "locator":{"url": "https://radio-t.com/blah1", "site": "remark42"}}`)
assert.NoError(t, err)
b, err := io.ReadAll(resp.Body)
assert.NoError(t, err)
require.Equal(t, http.StatusOK, resp.StatusCode, string(b))
assert.NoError(t, resp.Body.Close())

assert.NotContains(t, string(b), pngServer.URL)
assert.Contains(t, string(b), srv.RemarkURL)

assert.Equal(t, false, pngRead, "original image is not yet accessed by server")
// retrieve the image from the cache
imgURL := strings.Split(strings.Split(string(b), "src=\"")[1], "\"")[0]
// replace srv.RemarkURL with ts.URL
imgURL = strings.ReplaceAll(imgURL, srv.RemarkURL, ts.URL)
resp, err = http.Get(imgURL)
assert.NoError(t, err)
b, err = io.ReadAll(resp.Body)
assert.NoError(t, err)
require.Equal(t, http.StatusOK, resp.StatusCode, string(b))
assert.NoError(t, resp.Body.Close())
// compare image to original gopher png after decoding from base64
assert.Equal(t, gopher, base64.StdEncoding.EncodeToString(b))

assert.Equal(t, true, pngRead, "original image accessed to be shown to user")
})

}

func TestRest_CreateOldPost(t *testing.T) {
ts, srv, teardown := startupT(t)
defer teardown()
Expand Down
8 changes: 4 additions & 4 deletions backend/app/rest/api/rest_public_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ func TestRest_Preview(t *testing.T) {
assert.NoError(t, resp.Body.Close())
assert.Contains(t,
string(b),
"{\"code\":20,\"details\":\"can't renew staged picture cleanup timer\","+
"\"error\":\"can't get image stats for dev_user/bad_picture: stat",
`{"code":20,"details":"can't load picture from the comment",`+
`"error":"can't get image stats for dev_user/bad_picture: stat`,
)
assert.Contains(t,
string(b),
Expand All @@ -97,8 +97,8 @@ func TestRest_PreviewWithWrongImage(t *testing.T) {
assert.NoError(t, resp.Body.Close())
assert.Contains(t,
string(b),
"{\"code\":20,\"details\":\"can't renew staged picture cleanup timer\","+
"\"error\":\"can't get image stats for dev_user/bad_picture: stat ",
`{"code":20,"details":"can't load picture from the comment",`+
`"error":"can't get image stats for dev_user/bad_picture: stat `,
)
assert.Contains(t,
string(b),
Expand Down
82 changes: 47 additions & 35 deletions backend/app/store/image/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ func NewService(s Store, p ServiceParams) *Service {
return &Service{ServiceParams: p, store: s}
}

// SubmitAndCommit multiple ids immediately
func (s *Service) SubmitAndCommit(idsFn func() []string) error {
// Commit multiple ids immediately
func (s *Service) Commit(idsFn func() []string) error {
errs := new(multierror.Error)
for _, id := range idsFn() {
err := s.store.Commit(id)
Expand Down Expand Up @@ -117,7 +117,7 @@ func (s *Service) Submit(idsFn func() []string) {
for atomic.LoadInt32(&s.term) == 0 && time.Since(req.TS) <= s.EditDuration {
time.Sleep(time.Millisecond * 10) // small sleep to relive busy wait but keep reactive for term (close)
}
err := s.SubmitAndCommit(req.idsFn)
err := s.Commit(req.idsFn)
if err != nil {
log.Printf("[WARN] image commit error %v", err)
}
Expand All @@ -141,39 +141,14 @@ func (s *Service) Submit(idsFn func() []string) {

// ExtractPictures gets list of images from the doc html and convert from urls to ids, i.e. user/pic.png
func (s *Service) ExtractPictures(commentHTML string) (ids []string) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(commentHTML))
if err != nil {
log.Printf("[ERROR] can't parse commentHTML to parse images: %q, error: %v", commentHTML, err)
return nil
}
doc.Find("img").Each(func(i int, sl *goquery.Selection) {
if im, ok := sl.Attr("src"); ok {
if strings.Contains(im, s.ImageAPI) {
elems := strings.Split(im, "/")
if len(elems) >= 2 {
id := elems[len(elems)-2] + "/" + elems[len(elems)-1]
ids = append(ids, id)
}
}
if strings.Contains(im, s.ProxyAPI) {
proxiedURL, err := url.Parse(im)
if err != nil {
return
}
imgURL, err := base64.URLEncoding.DecodeString(proxiedURL.Query().Get("src"))
if err != nil {
return
}
imgID, err := CachedImgID(string(imgURL))
if err != nil {
return
}
ids = append(ids, imgID)
}
}
})
return s.extractImageIDs(commentHTML, true)
}

return ids
// ExtractNonProxiedPictures gets list of non-proxied images from the doc html and convert from urls to ids, i.e. user/pic.png
// This method is used in image check on post preview and load, as proxied images have lazy loading
// and wouldn't be present on disk but still valid as they will be loaded the first time someone requests them.
func (s *Service) ExtractNonProxiedPictures(commentHTML string) (ids []string) {
return s.extractImageIDs(commentHTML, false)
}

// Cleanup runs periodic cleanup with 1.5*ServiceParams.EditDuration. Blocking loop, should be called inside of goroutine by consumer
Expand Down Expand Up @@ -262,6 +237,43 @@ func (s *Service) ImgContentType(img []byte) string {
return contentType
}

// returns list of image IDs from the comment html, including proxied images if includeProxied is true
func (s *Service) extractImageIDs(commentHTML string, includeProxied bool) (ids []string) {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(commentHTML))
if err != nil {
log.Printf("[ERROR] can't parse commentHTML to parse images: %q, error: %v", commentHTML, err)
return nil
}
doc.Find("img").Each(func(i int, sl *goquery.Selection) {
if im, ok := sl.Attr("src"); ok {
if strings.Contains(im, s.ImageAPI) {
elems := strings.Split(im, "/")
if len(elems) >= 2 {
id := elems[len(elems)-2] + "/" + elems[len(elems)-1]
ids = append(ids, id)
}
}
if includeProxied && strings.Contains(im, s.ProxyAPI) {
proxiedURL, err := url.Parse(im)
if err != nil {
return
}
imgURL, err := base64.URLEncoding.DecodeString(proxiedURL.Query().Get("src"))
if err != nil {
return
}
imgID, err := CachedImgID(string(imgURL))
if err != nil {
return
}
ids = append(ids, imgID)
}
}
})

return ids
}

// prepareImage calls readAndValidateImage and resize on provided image.
func (s *Service) prepareImage(r io.Reader) ([]byte, error) {
data, err := readAndValidateImage(r, s.MaxSize)
Expand Down
3 changes: 2 additions & 1 deletion backend/app/store/image/image_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ func TestService_ExtractPictures(t *testing.T) {
ids = svc.ExtractPictures(html)
require.Equal(t, 1, len(ids), "one image in")
assert.Equal(t, "cached_images/12318fbd4c55e9d177b8b5ae197bc89c5afd8e07-a41fcb00643f28d700504256ec81cbf2e1aac53e", ids[0])
require.Empty(t, svc.ExtractNonProxiedPictures(html), "no non-proxied images expected to be found")

// bad url
html = `<img src=" https://remark42.radio-t.com/api/v1/img">`
Expand Down Expand Up @@ -150,7 +151,7 @@ func TestService_Submit(t *testing.T) {
svc := NewService(&store, ServiceParams{ImageAPI: "/blah/", EditDuration: time.Millisecond * 100})
svc.Submit(func() []string { return []string{"id1", "id2", "id3"} })
assert.Equal(t, 3, len(store.ResetCleanupTimerCalls()))
err := svc.SubmitAndCommit(func() []string { return []string{"id4", "id5"} })
err := svc.Commit(func() []string { return []string{"id4", "id5"} })
assert.NoError(t, err)
svc.Submit(func() []string { return []string{"id6", "id7"} })
assert.Equal(t, 5, len(store.ResetCleanupTimerCalls()))
Expand Down
2 changes: 1 addition & 1 deletion backend/app/store/service/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ func (s *DataStore) submitImages(comment store.Comment) {

var err error
if comment.Imported {
err = s.ImageService.SubmitAndCommit(idsFn)
err = s.ImageService.Commit(idsFn)
} else {
s.ImageService.Submit(idsFn)
}
Expand Down

0 comments on commit c72f30e

Please sign in to comment.