Skip to content

Commit

Permalink
License supports list format (go-gitea#4)
Browse files Browse the repository at this point in the history
* License supports list format

* test file type
  • Loading branch information
Hourunze1997 authored Jul 6, 2024
1 parent 4fbca8d commit b6959f1
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 2 deletions.
17 changes: 17 additions & 0 deletions modules/repository/configHooks/checkLicense.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,23 @@ while read oldrev newrev _; do
done
echo "License field is valid. Proceeding with the push."
log_operation "license check | success"
elif [[ "$license" =~ ^\[ ]]; then
license=$(echo "$readme_content" | grep -ozP -m 1 "license:\s*\K\[.*?\]")
license=$(echo "$license" | tr -d '[]')
license=$(echo "$license" | tr -d ',')
arr=($license)
for item in "${arr[@]}"; do
if [[ ! " ${valid_licenses[@]} " =~ " $item " ]]; then
echo "Sorry, your push was rejected during YAML metadata verification:"
echo " - Error: "license" must be one of (${valid_licenses[@]})"
log_error "Sorry, your push was rejected during YAML metadata verification:"
log_error " - Error: "license" must be one of (${valid_licenses[@]})"
log_operation "license check | failed"
exit 1
fi
done
echo "License field is valid. Proceeding with the push."
log_operation "license check | success"
elif [[ " ${valid_licenses[@]} " =~ " ${license} " ]]; then
echo "License field is valid. Proceeding with the push."
log_operation "license check | success"
Expand Down
1 change: 1 addition & 0 deletions modules/structs/px_repo_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ type CommitContentsResponse struct {
HTMLURL *string `json:"html_url"`
DownloadURL *string `json:"download_url"`
IsNonText *bool `json:"is_non_text"`
FileType *string `json:"file_type"`

Name string `json:"name"`
Path string `json:"path"`
Expand Down
108 changes: 107 additions & 1 deletion modules/typesniffer/typesniffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,62 @@ var (
svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`)
)

var audioFormats = map[string][][]byte{
"audio/aac": {[]byte("ADIF"), []byte("ADTS")},
"audio/amr": {[]byte("#!AMR\r\n")},
"audio/3pg": {[]byte("ftyp3gp")},
"audio/m4a": {[]byte("ftypmp41"), []byte("ftypmp42")},
"audio/x-ms-wma": {[]byte("ftypWMAV")},
"audio/x-ape": {[]byte("APE"), []byte("MAC")},
"audio/x-flac": {[]byte("fLaC")},
"audio/alac": {[]byte("alac")},
"audio/x-wavpack": {[]byte("wvpk")},
"audio/silk-v3": {[]byte("#!SILK_V3")},
"audio/opus": {[]byte("OpusHead")},
"audio/x-musepack": {[]byte("MPCK")},
"audio/ac3": {[]byte("AC3")},
"audio/dts": {[]byte("DTS")},
}

var videoFormats = map[string][][]byte{
"video/x-msvideo": {[]byte("RIFFAVI ")},
"video/x-flv": {[]byte("FLV")},
"video/mp4": {[]byte("ftypmp41"), []byte("ftypmp42")},
"video/mpeg": {{0x00, 0x00, 0x01, 0xBA}, {0x00, 0x00, 0x01, 0xB3}},
"video/x-ms-wmv": {[]byte("WMV1"), []byte("WMV2")},
"video/quicktime": {{0x6D, 0x6F, 0x6F, 0x76}},
"video/rmvb": {[]byte(".RMF")},
"application/x-mpegURL": {[]byte("#EXTM3U\n#EXT-X")},
}

var imageFormats = map[string][][]byte{
"image/tiff": {{0x49, 0x49, 0x2A, 0x00}, {0x4D, 0x4D, 0x00, 0x2A}},
"image/heif": {[]byte("ftypheic"), []byte("ftypmif1")},
"image/x-icon": {{0x00, 0x00, 0x01, 0x00}},
"image/x-tga": {{0x00, 0x00, 0x02, 0x00}},
}

var documentFormats = map[string][][]byte{
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": {{0x50, 0x4B, 0x04, 0x04}},
"application/msword": {{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}},
"application/vnd.ms-excel": {{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}},
"application/vnd.ms-powerpoint": {{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}},
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": {{0x50, 0x4B, 0x03,
0x04, 0x14, 0x00, 0x06, 0x00}},
"application/vnd.openxmlformats-officedocument.presentationml.presentation": {{0x50, 0x4B, 0x03,
0x04, 0x14, 0x00, 0x06, 0x00}},
"application/vnd.openxmlformats-officedocument.presentationml.slideshow": {{0x50, 0x4B, 0x03,
0x04, 0x14, 0x00, 0x06, 0x00}},
"application/vnd.openxmlformats-officedocument.spreadsheetml.template": {{0x50, 0x4B, 0x03,
0x04, 0x14, 0x00, 0x06, 0x00}},
"application/vnd.ms-excel.template.macroEnabled.12": {{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1}},
"application/vnd.ms-excel.sheet.binary.macroEnabled.12": {{0x09, 0x08, 0x10, 0x00,
0x00, 0x06, 0x05, 0x00}},
"application/vnd.ms-excel.sheet.macroEnabled.12": {{0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1,
0x1A, 0xE1}},
"application/epub+zip": {[]byte("PK\x03\x04")},
}

// SniffedType contains information about a blobs type.
type SniffedType struct {
contentType string
Expand All @@ -40,6 +96,17 @@ func (ct SniffedType) IsText() bool {
return strings.Contains(ct.contentType, "text/")
}

func (ct SniffedType) IsDocument() bool {
if ct.IsText() {
return true
} else {
if _, ok := documentFormats[ct.contentType]; ok {
return true
}
return false
}
}

// IsImage detects if data is an image format
func (ct SniffedType) IsImage() bool {
return strings.Contains(ct.contentType, "image/")
Expand All @@ -57,7 +124,7 @@ func (ct SniffedType) IsPDF() bool {

// IsVideo detects if data is an video format
func (ct SniffedType) IsVideo() bool {
return strings.Contains(ct.contentType, "video/")
return strings.Contains(ct.contentType, "video/") || ct.contentType == "application/x-mpegURL"
}

// IsAudio detects if data is an video format
Expand Down Expand Up @@ -127,6 +194,45 @@ func DetectContentType(data []byte) SniffedType {
ct = "audio/ogg" // for most cases, it is used as an audio container
}
}

if ct == "application/octet-stream" {
dataHead := data
if len(dataHead) > 256 {
dataHead = dataHead[:256] // only need to do a quick check for the file header
}
for ctName, headFormats := range documentFormats {
for _, prefix := range headFormats {
if bytes.HasPrefix(dataHead, prefix) {
ct = ctName
}
}
}

for ctName, headFormats := range audioFormats {
for _, prefix := range headFormats {
if bytes.HasPrefix(dataHead, prefix) {
ct = ctName
}
}
}

for ctName, headFormats := range videoFormats {
for _, prefix := range headFormats {
if bytes.HasPrefix(dataHead, prefix) {
ct = ctName
}
}
}

for ctName, headFormats := range imageFormats {
for _, prefix := range headFormats {
if bytes.HasPrefix(dataHead, prefix) {
ct = ctName
}
}
}
}

return SniffedType{ct}
}

Expand Down
78 changes: 77 additions & 1 deletion services/repository/files/px_content.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import (
"fmt"
"net/url"
"path"
"path/filepath"
"slices"
"strings"

"code.gitea.io/gitea/models"
Expand All @@ -18,6 +20,15 @@ import (

type checkOption func(*api.CommitContentsResponse, *git.TreeEntry) error

var audioExt = []string{".wav", ".mp3", ".aac", ".m4a", ".amr", ".3gp", "wma", ".ogg", ".ape",
".flac", ".wv", ".wvp", ".silk", ".opus", ".mpc", "mp+", ".ac3", ".dts"}
var videoExt = []string{".avi", ".flv", ".mp4", ".mpg", ".mpeg", ".wmv", ".mov", ".wma", ".rmvb",
".m3u8"}
var imageExt = []string{".jpg", ".jpeg", ".png", ".webp", ".gif", ".tif", ".tiff", ".heif",
".heic", ".ico", ".tga"}
var DocumentExt = []string{".docx", ".pdf", ".doc", ".xls", ".xlsx", ".ppt", ".pptx", ".pps",
".ppsx", ".xltx", ".xlsb", ".xltm", ".xlsm", ".txt", ".csv", ".epub", ".htm", ".html"}

// GetCommitContentsOrList gets the meta data of a file's contents (*ContentsResponse) if treePath not a tree
// directory, otherwise a listing of file contents ([]*ContentsResponse). Ref can be a branch, commit or tag
func GetCommitContentsOrList(ctx context.Context, repo *repo_model.Repository, treePath, ref string) (any, error) {
Expand Down Expand Up @@ -56,7 +67,7 @@ func GetCommitContentsOrList(ctx context.Context, repo *repo_model.Repository, t
}

if entry.Type() != "tree" {
return GetCommitContents(ctx, repo, treePath, origRef, false, checkIsNonText)
return GetCommitContents(ctx, repo, treePath, origRef, false, checkIsNonText, checkFileType)
}

// We are in a directory, so we return a list of FileContentResponse objects
Expand Down Expand Up @@ -248,6 +259,22 @@ func checkIsNonText(response *api.CommitContentsResponse, entry *git.TreeEntry)
return nil
}

func checkFileType(response *api.CommitContentsResponse, entry *git.TreeEntry) (err error) {
var fileType string
if _, b := isLFS(entry); b {
fileType, err = GetLfsFileType(entry, response.Name)
} else {
fileType, err = GetFileType(entry)
}

if err != nil {
return err
}
response.FileType = &fileType

return nil
}

func isLFS(entry *git.TreeEntry) (lfs.Pointer, bool) {
if !entry.IsRegular() || entry.Size() > 512 {
return lfs.Pointer{}, false
Expand Down Expand Up @@ -284,3 +311,52 @@ func isNonText(entry *git.TreeEntry) (bool, error) {

return !st.IsText(), nil
}

func GetLfsFileType(entry *git.TreeEntry, FileName string) (string, error) {
if !entry.IsRegular() {
return "", nil
}
extName := filepath.Ext(FileName)
if slices.Index(DocumentExt, extName) != -1 {
return "document", nil
} else if slices.Index(imageExt, extName) != -1 {
return "image", nil
} else if slices.Index(videoExt, extName) != -1 {
return "video", nil
} else if slices.Index(audioExt, extName) != -1 {
return "audio", nil
} else {
return "unknown", nil
}
}

func GetFileType(entry *git.TreeEntry) (string, error) {
if !entry.IsRegular() {
return "", nil
}

dataRc, err := entry.Blob().DataAsync()
if err != nil {
return "", err
}

defer dataRc.Close()

buf := make([]byte, 1024)
n, _ := util.ReadAtMost(dataRc, buf)
buf = buf[:n]

st := typesniffer.DetectContentType(buf)

if st.IsVideo() {
return "video", nil
} else if st.IsAudio() {
return "audio", nil
} else if st.IsImage() {
return "image", nil
} else if st.IsDocument() {
return "document", nil
} else {
return "unknown", nil
}
}

0 comments on commit b6959f1

Please sign in to comment.