autodescribe

#!/bin/sh
# autodescribe - automatically extract comments from files
#
# Copyright (c) 1998-2024 Dan Fandrich <dan@coneharvesters.com>
# Licensed under the MIT license (see LICENSE).

if [ -n "$ZSH_VERSION" ]; then
	# This is needed to make zsh work like the other shells and split arguments
	# in environment variables, which is need for $DECOMPRESS
	setopt shwordsplit
fi

# Print a shell-quoted version of the first argument
shquote () {
	printf '%s' "$1" | awk -v q="'" '{gsub(q, q "\\" q q, $0); printf "%s", q $0 q;}'
}

# Filter to trim leading and trailing whitespace and concatenate lines
trimspace () {
	tr '\n' ' ' | tr -d '\015' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'
}

# Drop a comment if it's too generic and not descriptive of an archive.
# It may be fine as a file description, however, so use this function only
# in contexts that make sense.
drop_useless_comment () {
	# RCS Id
	if [ -z "${1##\$Id:*}" ]; then
		return
	fi
	# Strip uninteresting characters before comparison
	BARE=$(echo "$1" | tr -d '.:=\000-' | trimspace)
	if echo "$BARE" | grep -Ei '^(about|background|contents|description|example|general information|install|installation|instructions|intro|introduction|module|name|overview|project|readme|readme\.txt|status|summary|synopsis|This is a Bazaar control directory|todo|tools|usage|usage guide)$' >/dev/null; then
		return
	fi
	echo "$1"
}

# Return the full language locale with dashes instead of underscores
# This might return a locale with or without a specific country.
get_lang_full () {
	locale | sed -n -E -e 's/_/-/g' -e '/^LANG=/s/^.*=([^.]*)\.?.*$/\1/p'
}

# Same as get_lang_full but with underscores instead of dashes
get_lang_full_under () {
	get_lang_full | tr '-' '_'
}

# Language locale with country-specific portion removed
get_lang_generic () {
	get_lang_full | sed 's/-.*$//'
}

# Make a filename starting with a dash - safe to provide a program that
# would interpret it as an option.
safefn () {
	case "$1" in
		-*) echo "./$1" ;;
		*) echo "$1" ;;
	esac
}

# Following are functions to extract descriptions for specific file types

# This is a generic tar comment extractor.  Comments are taken from embedded
# files, such as man pages, Appdata files, etc. The first argument is the
# file name, as normal, but the second argument is a command to decompress
# the tar archive and extract it to stdout.
#
# BUG: GNU tar will create empty directories, or directories containing
# symbolic links between ver. 1.11.2 until 1.33 (appears fixed in 1.34)
# BUG: GNU tar will extract all files if path ends in a number, e.g.
# playmidi-2.3/foo
# requires: tar, gzip, man-db, xmlstarlet
get_comment_compressed_tar () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	DECOMPRESS="$2"
	TMPFILE=$(mktemp)
	TMPFILE2=$(mktemp)
	# First, scan for all the possible embedded types we support. A list of all
	# possible file types are scanned first so the file only needs to be
	# decompressed a maximum of one more time (assuming a successful comment
	# extraction) to save time while also avoiding having lots of temporary
	# files lying around at the same time.
	#
	# Skip any readme files more than 2 levels down in the directory hierarchy.
	# Sort files by distance from root, so files higher up will be used first
	# when there is more than one. Files that appears to be in a documentation
	# directory get a half-level boost and those that appear to be in a dotted
	# (hidden) directory get a full level demotion. Because of this sorting,
	# wildcards cannot be used to extract files because the extraction order is
	# the order encountered in the file, not the order specified.
	$DECOMPRESS "$1" | tar -tf - | \
		grep -E '(\.man|\.[0-9]|\.lsm|\.appdata\.xml|\.metainfo\.xml|\.desktop|configure\.ac|README\.(adoc|md|rst|txt)|Readme\.(adoc|md|rst|txt)|ReadMe\.(adoc|md|rst|txt)|readme\.(adoc|md|rst|txt)|README|Readme|ReadMe|readme|\.texi|\.texinfo|pyproject.toml|CMakeLists\.txt|\.pc|\.pc\.in|\<file_id\.diz)$' | \
		awk 'BEGIN {FS="/"} {doc=!!match($0, "/(([Dd]oc)|[Mm]an|[Ii]nfo)"); dot=!!index($0, "/."); print split($0, a)*2-doc+2*dot "\t" $0;}' | \
	sort -n | \
	cut -f2- | \
	grep -viE '^.*/.*/.*/.*readme(\.[a-z]*)?$' > "$TMPFILE"

	if [ -s "$TMPFILE" ]; then
		# Found at least one candidate file

		# Try to find the base name of the tar ball, without version numbers
		# and file extensions. This isn't always easy, so use two heuristics to
		# find one shorter and one (possibly) longer candidate.
		BASENAME1=$(basename "$sf" | sed -e 's/[^a-zA-Z0-9].*$//')
		BASENAME2=$(basename "$sf" | sed -n -e 's/^\([a-zA-Z0-9]\+\(-[a-zA-Z][a-zA-Z0-9]*\)*\).*$/\1/p')

		# Note: file types that do not tolerate concatenation with subsequent
		# files (e.g. XML) must use MATCHNAME to extract only one single file
		# instead of using wildcards (which is also bad given the reason
		# above).

		# Appdata file
		MATCHNAME=$(grep -E '(\.appdata\.xml|\.metainfo\.xml)$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
			comment_appdata "$TMPFILE2"
		fi

		# XDG desktop file
		MATCHNAME=$(grep '\.desktop$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
			comment_desktop "$TMPFILE2"
		fi

		# Linux Software Map
		# This is basically obsolete these days but can be found in old
		# archives.
		MATCHNAME=$(grep '\.lsm$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
			comment_lsm "$TMPFILE2"
		fi

		# file_id.diz
		# This is likely obsolete these days but can be found in old
		# archives.
		MATCHNAME=$(grep 'file_id\.diz$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
			comment_first_line "$TMPFILE2"
		fi

		# pyproject.toml
		MATCHNAME=$(grep 'pyproject\.toml$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
			comment_pyproject "$TMPFILE2"
		fi

		# man page
		# First, look for a man page based on the simple name of the tar file
		MATCHNAME=$(grep -iE "(^|/)$BASENAME1(\.man|.[0-9])$" < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -z "$MATCHNAME" ]; then
			# Expand the attempt to find a matching man page
			MATCHNAME=$(grep -iE "(^|/)$BASENAME2(\.man|.[0-9])$" < "$TMPFILE" | head -1)
		fi
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
			comment_man "$TMPFILE2"
		fi
		# Finally, try the first man page found
		# Only look at the first matching man page since lexgrog wants only one
		# and since some archives contain hundreds. The first one might not
		# be the best match, unfortunately.
		MATCHNAME=$(grep -E '(\.man|.[0-9])$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
			comment_man "$TMPFILE2"
		fi

		# TeXinfo pages
		# First, look for a texinfo page based on the simple name of the tar
		# file
		MATCHNAME=$(grep -iE "(^|/)$BASENAME1\.texi(nfo)?$" < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -z "$MATCHNAME" ]; then
			# Expand the attempt to find a matching texinfo page
			MATCHNAME=$(grep -iE "(^|/)$BASENAME2\.texi(nfo)?$" < "$TMPFILE" | head -1)
		fi
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
			comment_texi "$TMPFILE2"
		fi
		# Finally, try any texinfo pages
		# Note: this doesn't handle spaces in filenames
		MATCHNAME=$(grep -E '\.texi(nfo)?$' < "$TMPFILE")
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - $MATCHNAME 2>/dev/null > "$TMPFILE2"
			comment_texi "$TMPFILE2"
		fi

		# pkg-config file
		MATCHNAME=$(grep -i '\.pc$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" 2>/dev/null > "$TMPFILE2"
			comment_pc "$TMPFILE2"
		fi

		# pkg-config template file
		MATCHNAME=$(grep -i '\.pc\.in$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" 2>/dev/null > "$TMPFILE2"
			comment_pctmpl "$TMPFILE2"
		fi

		# README.md file
		MATCHNAME=$(grep -i 'README\.md$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" 2>/dev/null > "$TMPFILE2"
			comment_md "$TMPFILE2"
		fi
		# Drop the comment if it's not useful in this context
		COMMENT=$(drop_useless_comment "$COMMENT")

		# README.rst file
		MATCHNAME=$(grep -i 'README\.rst$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" 2>/dev/null > "$TMPFILE2"
			comment_first_line "$TMPFILE2"
		fi
		COMMENT=$(drop_useless_comment "$COMMENT")

		# README.adoc file
		MATCHNAME=$(grep -i 'README\.adoc$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" 2>/dev/null > "$TMPFILE2"
			comment_asciidoc "$TMPFILE2"
		fi
		COMMENT=$(drop_useless_comment "$COMMENT")

		# README file
		MATCHNAME=$(grep -iE 'README(\.txt)?$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" 2>/dev/null > "$TMPFILE2"
			comment_first_line "$TMPFILE2"
		fi
		COMMENT=$(drop_useless_comment "$COMMENT")

		# GNU autoconf
		MATCHNAME=$(grep 'configure\.ac$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
			comment_ac "$TMPFILE2"
		fi

		# CMake
		MATCHNAME=$(grep 'CMakeLists\.txt$' < "$TMPFILE" | head -1)
		if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
			$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
			comment_cmake "$TMPFILE2"
		fi
	fi
	if [ -n "$TMPFILE2" ]; then
		rm -f "$TMPFILE2"
	fi
	if [ -n "$TMPFILE" ]; then
		rm -f "$TMPFILE"
	fi
}

# File type: tbz2 (bzip2-compressed tar archive)
# Comments are taken from embedded files.
# requires: tar, bzip2, man-db, xmlstarlet
comment_tbz2 () {
	get_comment_compressed_tar "$1" 'bzip2 -dc --'
}

# File type: tgz (compressed tar archive)
# Comments are taken from embedded files.
# requires: tar, gzip, man-db, xmlstarlet
comment_tgz () {
	get_comment_compressed_tar "$1" 'gzip -dc --'
}

# File type: tlzip (lzip-compressed tar archive)
# Comments are taken from embedded files.
# requires: tar, lzip, man-db, xmlstarlet
comment_tlzip () {
	get_comment_compressed_tar "$1" 'lzip -dc --'
}

# File type: tlzma (lzma-compressed tar archive)
# Comments are taken from embedded files.
# requires: tar, lzma, man-db, xmlstarlet
comment_tlzma () {
	get_comment_compressed_tar "$1" 'lzma -dc --'
}

# File type: txz (xzip-compressed tar archive)
# Comments are taken from embedded files.
# requires: tar, xz, man-db, xmlstarlet
comment_txz () {
	get_comment_compressed_tar "$1" 'xz -dc --'
}

# File type: tzst (zstd-compressed tar archive)
# Comments are taken from embedded files.
# requires: tar, zstd, man-db, xmlstarlet
comment_tzst () {
	get_comment_compressed_tar "$1" 'zstd -dc --'
}


# File type: lzh (lzh/lha archive with desc.sdi comment)
# requires: lha || lhasa
comment_lzh () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$( (lha pq "$sf" desc.sdi 2>/dev/null || lhasa pq - desc.sdi < "$1" 2>/dev/null) | head -3 | trimspace)
}

# File type: zip (zip archive)
# requires: unzip (Info-ZIP version)
comment_zip () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	if [ -n "$(unzip -zq "$sf" dummy-file-to-eliminate-output)" ]; then
		COMMENT=$(unzip -zq "$sf" dummy-file-to-eliminate-output | head -2 | trimspace)

	elif unzip -vqq "$sf" desc.sdi > /dev/null ; then
		COMMENT=$(unzip -pq "$sf" desc.sdi | head -3 | trimspace)
	fi
}

# File type: zoo (zoo archive)
# requires: unzoo || zoo
comment_zoo () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(unzoo -l -v "$sf" | sed -n -e '/^Length/,$d' -e 's/^# //p' | head -3 | trimspace)
	if [ -z "$COMMENT" ]; then
		COMMENT=$(zoo vc "$sf" dummy-file-to-eliminate-output | sed -n -e 's/^>> //p' | trimspace)
	fi
	if [ -z "$COMMENT" ] && zoo vq "$sf" desc.sdi > /dev/null ; then
		COMMENT=$(zoo xpq "$sf" desc.sdi | head -3 | trimspace)
	fi
}

# File type: plist (Apple property list)
# requires: xmlstarlet
comment_plist () {
	# Search for the more desirable names first
	COMMENT=$(xmlstarlet sel -t -m "/plist/dict/key[normalize-space(text())='CFBundleDisplayName'][1]" -v "following-sibling::string[1]" -nl -t -m "/plist/dict/key[normalize-space(text())='CFBundleGetInfoString'][1]" -v "following-sibling::string[1]" -nl -t -m "/plist/dict/key[normalize-space(text())='OpenSourceProject'][1]" -v "following-sibling::string[1]" -nl -t -m "/plist/dict/key[normalize-space(text())='Label'][1]" -v "following-sibling::string[1]" -nl -t -m "/plist/dict/key[normalize-space(text())='CFBundleName'][1]" -v "following-sibling::string[1]" -nl -t -m "/plist/dict/key[normalize-space(text())='CFBundleIdentifier'][1]" -v "following-sibling::string[1]" -nl < "$1" 2>/dev/null | head -1)
}

# File type: pyproject.toml (Python project definition)
comment_pyproject () {
	COMMENT=$(sed -En -e '/^\[project\]$/,/^\[/s/^description *= *"?([^"]*)"? *$/\1/p' < "$1" | head -1)
	if [ -z "$COMMENT" ]; then
		COMMENT=$(sed -En -e '/^\[project\]$/,/^\[/s/^name *= *"?([^"]*)"? *$/\1/p' < "$1" | head -1)
	fi
}

# File type: rpm (rpm package)
# requires: rpm
comment_rpm () {
	COMMENT=$(rpm -qp --queryformat '%{SUMMARY}' -- "$1")
}

# File type: cmake (CMake build file)
comment_cmake () {
	COMMENT=$(sed -nE -e 's/^[[:space:]]*[Pp][Rr][Oo][Jj][Ee][Cc][Tt][[:space:]]*\([ "'"'"']*([^"'"'"']*)[ "'"'"']*\)[[:space:]]*(#.*)?$/\1/p' < "$1" | trimspace)
}

# File type: cue (CD cue index file)
comment_cue () {
	COMMENT=$(sed -n -E -e '/^TITLE/s/^TITLE *"([^"]*)" *$/\1/p' < "$1" | head -1)
}

# File type: cbm (Commodore disk image)
# See https://github.com/dfandrich/fvcbm/
# requires: fvcbm
comment_cbm () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(fvcbm "$sf" | sed -Ene 's/^Title: {1,3}(.{1,16}).*$/\1/p' -e '/^={16,}/q' | trimspace)
}

# File type: desktop (XDG desktop entry file)
comment_desktop () {
	# First look for the name, localized if possible
	# Get current locale language
	L="$(get_lang_full_under)"
	if [ -z "$L" ]; then
		L=en  # English by default
	fi
	COMMENT=$(sed -n -e "s/^Name\[$L\]=//p" < "$1" | head -1)
	if [ -z "$COMMENT" ]; then
		# Try again with generic language locale
		L="$(get_lang_generic)"
	    COMMENT=$(sed -n -e "s/^Name\[$L\]=//p" < "$1" | head -1)

		if [ -z "$COMMENT" ]; then
			# Try again with English
			L=en
			COMMENT=$(sed -n -e "s/^Name\[$L\]=//p" < "$1" | head -1)

		    if [ -z "$COMMENT" ]; then
				# Finally, try the non-locale-dependent entry
			    COMMENT=$(sed -n -e "s/^Name=//p" < "$1" | head -1)
		    fi
		fi
	fi
	NAME="$COMMENT"

	# Next look for the comment, localized if possible
	# Get current locale language
	L="$(get_lang_full_under)"
	if [ -z "$L" ]; then
		L=en  # English by default
	fi
	COMMENT=$(sed -n -e "s/^Comment\[$L\]=//p" < "$1" | head -1)
	if [ -z "$COMMENT" ]; then
		# Try again with generic language locale
		L="$(get_lang_generic)"
	    COMMENT=$(sed -n -e "s/^Comment\[$L\]=//p" < "$1" | head -1)

		if [ -z "$COMMENT" ]; then
			# Try again with English
			L=en
			COMMENT=$(sed -n -e "s/^Comment\[$L\]=//p" < "$1" | head -1)

		    if [ -z "$COMMENT" ]; then
				# Finally, try the non-locale-dependent entry
			    COMMENT=$(sed -n -e "s/^Comment=//p" < "$1" | head -1)
		    fi
		fi
	fi

	# Now, use the right combination if more than one is found
	if [ -n "$NAME" ]; then
		if [ -n "$COMMENT" ]; then
			COMMENT="$NAME, $COMMENT"
		else
			COMMENT="$NAME"
		fi
	fi
}

# File type: doc (Microsoft composite document)
# requires: file
comment_doc () {
	# This will truncate titles with commas, but there's no foolproof way
	# to avoid it due to the simplistic output format used by file
	COMMENT=$(file - < "$f" | sed -n -e 's@^.*Title: \+\([^,]*\).*$@\1@p')
	if [ -z "$COMMENT" ] ; then
		COMMENT=$(file - < "$f" | sed -n -e 's@^.*Subject: \+\([^,]*\).*$@\1@p')
	fi
}

# File type: docx (Microsoft Office document)
# requires: unzip, xmlstarlet
comment_docx () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(unzip -pq "$sf" docProps/core.xml | xmlstarlet sel -t -v /cp:coreProperties/dc:title)
}

# File type: egg (Python egg package)
# requires: unzip
comment_egg () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(unzip -pq "$sf" EGG-INFO/PKG-INFO | sed -En 's/^(Name|Summary): *//p' | sed -e N -e 's/\n/, /')
}

# File type: exe (Microsoft Windows PE executable)
# File type: dll (Microsoft Windows PE dynamic link library)
# requires: python >= 3, pefile (see https://github.com/erocarrera/pefile/)
comment_exe () {
	COMMENT=$(python3 -c '
import pefile,sys
try:
	pe=pefile.PE(data=sys.stdin.buffer.read())
	if hasattr(pe, "FileInfo"):
		print("".join([v.decode("UTF-8")
			for fi in pe.FileInfo
				for sfi in fi if hasattr(sfi, "StringTable")
					for item in sfi.StringTable
						for k,v in item.entries.items() if k == b"FileDescription" ]))
except pefile.PEFormatError:
	pass  # probably an old-style file
' < "$1")
}

# File type: ebook (E-book formats, epub, azw, fbz, etc.)
# These are all handled by ebook-meta (part of calibre) so there doesn't seem to be a
# good reason to separate them into individual handlers.
# requires: calibre
comment_ebook() {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(ebook-meta -- "$1" | sed -n -e 's/^Title *: //p')
	# ebook-meta falls back to the file name (without extension) as the title
	# if no other title can be found. This isn't very useful here, so blank the
	# comment if that happens.
	BASENAME=$(basename "$sf" | sed -e 's/\.[^.]*$//')
	if [ "$COMMENT" = "$BASENAME" ]; then
		COMMENT=""
	fi
}

# File type: kicad (Kicad schematic)
comment_kicad () {
	COMMENT=$(sed -n -e '1,/(title_block/d' -e 's/^[[:space:]]*(title "\(.*\)")$/\1/p' -e '/^[[:space:]]*)$/,$d' < "$1")
}

# File type: kra (Krita image)
# requires: unzip, xmlstarlet
comment_kra () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(unzip -pq "$sf" documentinfo.xml | xmlstarlet sel -N d=http://www.calligra.org/DTD/document-info -t -v /d:document-info/d:about/d:title 2>/dev/null )
}

# File type: kvtml (kwordquiz flash card file)
# requires: xmlstarlet
comment_kvtml () {
	COMMENT=$(xmlstarlet sel -t -v '/kvtml/information/title' < "$1" 2>/dev/null)
}

# File type: lsm (Linux Software Map)
comment_lsm () {
	# Title can probably be multiline, but we don't bother
	COMMENT=$(sed -n 's/^Title:[[:space:]]*//p' < "$1" | head -1)
}

# File type: pc (pkg-config library file)
# If a shell variable substitution is noted, then that field is ignored.
comment_pc () {
	# First look for the name
	COMMENT=$(sed -n 's/^Name: *//p' < "$1" | head -1)
	if echo "$COMMENT" | grep '\$' >/dev/null; then
		COMMENT=
	fi
	NAME="$COMMENT"

	# Next look for the description
	COMMENT=$(sed -n 's/^Description: *//p' < "$1" | head -1)
	if echo "$COMMENT" | grep '\$' >/dev/null; then
		COMMENT=
	fi

	# Now, use the right combination if more than one is found
	if [ -n "$NAME" ]; then
		if [ -n "$COMMENT" ]; then
			COMMENT="$NAME, $COMMENT"
		else
			COMMENT="$NAME"
		fi
	fi
}

# This is the autoconf-style templatized version of a .pc file.
# It works the same as a normal pkg-config file except if a template
# substitution is noted, then that field is ignored.
# File type: pctmpl (pkg-config library file template)
comment_pctmpl () {
	# First look for the name
	COMMENT=$(sed -n 's/^Name: *//p' < "$1" | head -1)
	if echo "$COMMENT" | grep '[$@]' >/dev/null; then
		COMMENT=
	fi
	NAME="$COMMENT"

	# Next look for the description
	COMMENT=$(sed -n 's/^Description: *//p' < "$1" | head -1)
	if echo "$COMMENT" | grep '[$@]' >/dev/null; then
		COMMENT=
	fi

	# Now, use the right combination if more than one is found
	if [ -n "$NAME" ]; then
		if [ -n "$COMMENT" ]; then
			COMMENT="$NAME, $COMMENT"
		else
			COMMENT="$NAME"
		fi
	fi
}

# File type: pacman (Pacman package)
# requires: tar, xz
# TODO: these packages can be compressed with gzip, bzip2 and zstd as well
comment_pacman () {
	COMMENT=$(xz -dc < "$1" | tar -xOf - .PKGINFO | sed -n -e 's/^pkgname = //p' -e 's/^pkgdesc = /, /p' | tr -d '\n')
}

# File type: pdf (Portable Document Format)
# requires: poppler
comment_pdf () {
	COMMENT=$(pdfinfo -- "$1" | sed -E -n -e 's/^(Title|Subject|Author):[[:space:]]*//p' | head -1)
	if echo "$COMMENT" | grep -iE '^untitled(-[0-9]*)?$' >/dev/null; then
		# Drop this common but useless comment
		COMMENT=""
	fi
}

# File type: png (PNG image)
# requires: pngtools
comment_png () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(pnginfo "$sf" | sed -n -e 's/^[[:space:]]\+Title[^:]*: //p')
	if [ -z "$COMMENT" ] ; then
		COMMENT=$(pnginfo "$sf" | sed -n -e 's/^[[:space:]]\+Description[^:]*: //p')
	fi
}

# File type: ps (PostScript source code)
comment_ps () {
	COMMENT=$(sed -n 's/^%%Title: *//p' < "$1" | head -1)
	if [ -z "$COMMENT" ] ; then
		# Try for an alternate comment
		# Look for a comment beginning with one or more % followed by a space
		COMMENT=$(sed -n 's/^%%*  *//p' < "$1" | head -1)
	fi
}

# File type: psfont (PostScript font)
comment_psfont () {
	COMMENT=$(sed -n 's/^\/FullName[ 	]*(\(.*\)).*$/\1/p' < "$1" | head -1)
	if [ -z "$COMMENT" ] ; then
        # Try for an alternate comment
		COMMENT=$(sed -n 's/^\/FontName[ 	]*\/\([^ 	]*\).*$/\1/p' < "$1" | head -1)
	fi
}

# File type: rss (Really Simple Syndication)
# requires: xmlstarlet
comment_rss () {
	COMMENT=$(xmlstarlet sel -t -v '/rss/channel/title' < "$1")
}

# File type: subject (news article or mail file)
# File type: patch (git format-patch)
comment_subject () {
	COMMENT=$(sed -n 's/^Subject: *//ip' < "$1" | head -1 | trimspace)
	if [ -z "$COMMENT" ] ; then
        # Try for an alternate comment
		COMMENT=$(sed -n 's/^Content-Description: *//ip' < "$1" | head -1 | trimspace)
	fi
}

# File type: sla (Scribus document)
# requires: xmlstarlet
comment_sla () {
	COMMENT=$(xmlstarlet sel -t -v '/SCRIBUSUTF8NEW/DOCUMENT/@TITLE' < "$1")
}

# File type: slob (Sorted List of Blobs dictionary)
# See https://github.com/itkach/slob/
# requires: slob
comment_slob () {
	COMMENT=$(slob tag -n label -- "$1")
}

# File type: snap (Snap package)
# requires: squashfs
comment_snap () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(unsquashfs -cat "$sf" meta/snap.yaml | sed -n 's/^summary: //p')
}

# File type: spdx (Software Package Data Exchange)
# See https://spdx.dev/
comment_spdx () {
	# This works for SPDX 2.X but may not for SPDX 3.X (once it's finalized)
	COMMENT=$(sed -n 's/^DocumentName: *//p' < "$1" | head -1)
}

# File type: stl (STL model file)
comment_stl () {
	# Detect ASCII or binary format
	if [ "$(dd if="$1" bs=1 count=80 2>/dev/null | wc -l)" -gt 0 -a "$(dd if="$1" bs=1 count=6 2>/dev/null | tr -d '\0')" = "solid " -a "$(dd if="$1" bs=1 count=1 skip=80 2>/dev/null | od -An -b)" != "000" ]; then
		# ASCII stl format
		COMMENT=$(sed -n -E -e 's/[[:space:]]*$//' -e '1s/^solid +//p' < "$1")
	else
		# Binary stl format
		COMMENT=$(dd if="$1" bs=1 count=80 2>/dev/null | tr -d '\0' | sed -e 's/[[:space:]]*$//' | iconv -f windows-1252)
	fi
}

# File type: svg (Scalable Vector Graphics image)
# requires: xmlstarlet
comment_svg () {
	COMMENT=$(xmlstarlet sel -t -v "/*[local-name()='svg']/*[local-name()='title']" < "$1")
	if [ -z "$COMMENT" ] ; then
		# Try for title in metadata
		COMMENT=$(xmlstarlet sel -t -v "/*[local-name()='svg']/*[local-name()='metadata']/*[local-name()='RDF']/*[local-name()='Work']/*[local-name()='title']" < "$1")
	fi
}

# File type: svgz (Compressed Scalable Vector Graphics image)
# requires: gzip, xmlstarlet
comment_svgz () {
	COMMENT=$(gzip -dc < "$1" | xmlstarlet sel -t -v "/*[local-name()='svg']/*[local-name()='title']")
	if [ -z "$COMMENT" ] ; then
		# Try for title in metadata
		COMMENT=$(gzip -dc < "$1" | xmlstarlet sel -t -v "/*[local-name()='svg']/*[local-name()='metadata']/*[local-name()='RDF']/*[local-name()='Work']/*[local-name()='title']")
	fi
}

# File type: tellico (Tellico database file)
# requires: unzip, xmlstarlet
comment_tellico () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	# stderr always contains a warning about the DTD; send that (and every
	# other warning as a side effect) to /dev/null
	COMMENT=$(unzip -pq "$sf" tellico.xml | xmlstarlet sel -t -v "/*[local-name()='tellico']/*[local-name()='collection']/@title" 2> /dev/null)
}

# File type: tex (TeX document)
comment_tex () {
	# This parsing is probably too simplistic, but it works much of the time
	COMMENT=$(sed -n -E -e 's/^[[:space:]]*\\title\{([^}]+)\}.*/\1/p' < "$1" | trimspace)
}

# File type: texi (Texinfo document)
comment_texi () {
	# This parsing is probably too simplistic, but it works most of the time
	COMMENT=$(sed -n -e 's/^[[:space:]]*@settitle //p' < "$1" | trimspace)
}

# File type: tiff (TIFF image)
# requires: libtiff-progs
comment_tiff () {
	# Sort the tags so that ImageDescription is used in preference to the
	# others.
	COMMENT=$(tiffinfo -- "$1" | sort -r | sed -E -n -e 's/^ *(ImageDescription|DocumentName|Artist): //p' | head -1)
}

# File type: whl (Python wheel package)
# requires: unzip
comment_whl () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(unzip -pq "$sf" '*.dist-info/METADATA' | sed -En 's/^(Name|Summary): *//p' | sed -e N -e 's/\n/, /')
}

# File type: xhb (Homebank file)
# requires: xmlstarlet
comment_xhb () {
	COMMENT=$(xmlstarlet sel -t -v '/homebank/account/@name' < "$1")
}

# File type: ac (autoconf source code)
comment_ac () {
	# Catch this style:
	#   AC_INIT([package], [version], [address])
	# but not this (old) style:
	#   AC_INIT(inc/libxyz.h.in)
	COMMENT=$(sed -E -n 's/^[[:space:]]*AC_INIT\([[:space:]]*\[?([^],]+).*,.*\)/\1/p' < "$1")
}

# File type: c (C source code)
comment_c () {
	COMMENT=$(sed -n '/\/\*/,/\*\//{
			/\/\*/s/^.*\/\*[ 	]*//
			/\*\//s/.*\*\/[ 	]*//
			s/^[ 	*]*//
			s/\*\/ *$//
			s/[ 	]*$//
			p 
			}' < "$1" | head -3 | trimspace)
}

# File type: pascal (Pascal source code)
comment_pascal () {
	COMMENT=$(sed -n -e '/^[[:space:]]*(\*[-\* ]*\*)[[:space:]]*$/d' -e '/^[[:space:]]*\*[[:space:]]*$/d' -e '/(\*/,/\*)/{
			/(\*/s/^.*(\*//
			s/^[ 	*]*//
			s/[ 	]*$//
			p 
			}' -e '/{.*}/{
			s/^.*{//
			s/} *$//
			s/^[ 	*]*//
			s/[ 	]*$//
			p
			}' < "$1" | head -3 | trimspace)
}

# File type: 3mf (3-D Manufacturing format)
# requires: unzip, xmlstarlet
comment_3mf () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(unzip -pq "$sf" 3D/3dmodel.model | xmlstarlet sel -t -v "/*[local-name()='model']/*[local-name()='metadata'][@name='Title']")
}

# File type: abw (AbiWord document)
# requires: xmlstarlet
comment_abw () {
	COMMENT=$(xmlstarlet sel -N a=http://www.abisource.com/awml.dtd -t -v "/a:abiword/a:metadata/a:m[@key='dc.title']" < "$1" 2>/dev/null)
}

# File type: asciidoc (AsciiDoc text)
comment_asciidoc () {
	COMMENT=$(head -10 < "$1" | sed -nE -e 's/[ =]*$//' -e 's/^=+ //p' | head -1 | trimspace)
}

# File type: amf (Additive Manufacturing File)
# requires: unzip, xmlstarlet
comment_amf () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	if [ "$(dd if="$1" bs=1 count=2 2>/dev/null )" = "PK" ]; then
		# If the amf file is zip compressed, uncompress it first
		TMPFILE=$(mktemp)
		INFILE="${TMPFILE}"
		unzip -pq "$sf" > "$INFILE"
	else
		TMPFILE=
		INFILE="$1"
	fi

	COMMENT=$(xmlstarlet sel -t -v "/amf/metadata[@type='name']" < "$INFILE")
	if [ -z "$COMMENT" ] ; then
		# BUG: There will be no separators between multiple object names
		COMMENT=$(xmlstarlet sel -t -v "/amf/object/metadata[@type='name']" < "$INFILE")
	fi

	if [ -n "$TMPFILE" ]; then
		rm -f "$TMPFILE"
	fi
}

# File type: apk (Android Package Kit)
# requires: Android SDK
comment_apk () {
	# Get current locale language
	L="$(get_lang_full)"
	if [ -z "$L" ]; then
		L=en  # English by default
	fi
	COMMENT=$(aapt  d badging "$1" | grep "^application-label-${L}" | sed -e "s/^[^']*'//" -e "s/'.*$//" | head -1)
	if [ -z "$COMMENT" ]; then
		# Try again with generic locale
		L="$(get_lang_generic)"
		COMMENT=$(aapt  d badging "$1" | grep "^application-label-${L}" | sed -e "s/^[^']*'//" -e "s/'.*$//" | head -1)

		if [ -z "$COMMENT" ]; then
			# Try again with English
			L=en
			COMMENT=$(aapt  d badging "$1" | grep "^application-label-${L}" | sed -e "s/^[^']*'//" -e "s/'.*$//" | head -1)
		fi
	fi
}

# File type: appdata (Appdata metainfo file)
# requires: xmlstarlet
comment_appdata () {
	# First look for the name, localized if possible
	# Get current locale language
	L="$(get_lang_full)"
	if [ -z "$L" ]; then
		L=en  # English by default
	fi
	COMMENT=$(xmlstarlet sel  -t -v '(/application|/component)/name[@xml:lang="'"$L"'"]' <"$1")
	if [ -z "$COMMENT" ]; then
		# Try again with generic locale
		L="$(get_lang_generic)"
	    COMMENT=$(xmlstarlet sel  -t -v '(/application|/component)/name[@xml:lang="'"$L"'"]' <"$1")
		if [ -z "$COMMENT" ]; then
			# Try again with the default name
			COMMENT=$(xmlstarlet sel  -t -v '(/application|/component)/name[not(@xml:lang)]' <"$1" | head -1)
		fi
	fi
	NAME="$COMMENT"

	# Next look for the summary, localized if possible
	# Get current locale language
	L="$(get_lang_full)"
	if [ -z "$L" ]; then
		L=en  # English by default
	fi
	COMMENT=$(xmlstarlet sel  -t -v '(/application|/component)/summary[@xml:lang="'"$L"'"]' <"$1")
	if [ -z "$COMMENT" ]; then
		# Try again with generic locale
		L="$(get_lang_generic)"
	    COMMENT=$(xmlstarlet sel  -t -v '(/application|/component)/summary[@xml:lang="'"$L"'"]' <"$1")
		if [ -z "$COMMENT" ]; then
			# Try again with the default summary
			COMMENT=$(xmlstarlet sel  -t -v '(/application|/component)/summary[not(@xml:lang)]' <"$1" | head -1)
		fi
	fi

	# Now, use the right combination if more than one is found
	if [ -n "$NAME" ]; then
		if [ -n "$COMMENT" ]; then
			COMMENT="$NAME, $COMMENT"
		else
			COMMENT="$NAME"
		fi
	fi
}

# File type: arj (ARJ archive)
# requires: arj
comment_arj () {
	COMMENT=$(arj v -- "$1" | sed -e '1,/^Archive created:/d' -e '/^Sequence\/Pathname\/Comment\/Chapters/,$d' | head -3 | trimspace)
}

# File type: asm (assembly language source)
comment_asm () {
	COMMENT=$(sed -n -E -e "/^[[:space:]]*(\.)?title/{s/^[[:space:]]*(\.)?title[[:space:]]*//;s/^['\"]*//;s/['\"]*$//;p;}" < "$1")
}

# File type: aup (Audacity Project file)
# requires: xmlstarlet
comment_aup () {
	COMMENT=$(xmlstarlet sel -N a=http://audacity.sourceforge.net/xml/ -t -v '/a:project/a:tags/a:tag[@name="TITLE"]/@value' < "$1" 2>/dev/null)
}

# File type: avi (Audio Video Interleave file)
# See https://sourceforge.net/projects/avifile/
# requires: avifile-samples
comment_avi () {
	COMMENT=$(avitype -- "$1" | sed -n -e 's/^<AVI reader> *: *InfoChunk Name: *//p')
	if [ -z "$COMMENT" ] ; then
		comment_ffmpeg "$@"
	fi
}

# File type: dar (Disk Archiver archive)
# See http://dar.linux.free.fr/
# requires: dar >= 2.7.0
comment_dar () {
	COMMENT=$(LC_ALL=C dar -Q -aheader -l "$1" 2>/dev/null | sed -n -e 's/^User comment *: *//p')
}

# File type: deb (Debian package)
# requires: binutils, gzip, tar, xz, zstd
comment_deb () {
	case "$(ar t -- "$1" control.tar.zst control.tar.xz control.tar.gz 2>/dev/null)" in
		*zst) COMMENT=$(ar p -- "$1" control.tar.zst | zstd -dc | tar xOf - ./control | sed -n 's/^Description: *//p' ) ;;
		*xz) COMMENT=$(ar p -- "$1" control.tar.xz | xz -qdc | tar xOf - ./control | sed -n 's/^Description: *//p' ) ;;
		*gz) COMMENT=$(ar p -- "$1" control.tar.gz | gzip -qdc | tar xOf - ./control | sed -n 's/^Description: *//p' ) ;;
		*) echo Error: unknown deb compression 1>&2
			COMMENT=
			;;
	esac
}

# File type: docbook (DocBook document)
# requires: xmlstarlet
comment_docbook () {
	# Docbook document
	COMMENT=$(xmlstarlet sel -t -v /book/bookinfo/title < "$1" 2>/dev/null)
	if [ -z "$COMMENT" ] ; then
		# Docbook man page
		COMMENT=$(xmlstarlet sel -t -v /refentry/refmeta/refentrytitle < "$1" 2>/dev/null)
	fi
}

# File type: flac (FLAC audio file)
# requires: flac
comment_flac () {
	COMMENT=$(metaflac --export-tags-to=- -- "$1" | sed -n -E -e 's/^TITLE=//p')
	if [ -z "$COMMENT" ] ; then
		comment_ffmpeg "$@"
	fi
}

# File type: flatpakref (Flatpak Reference File)
comment_flatpakref () {
	COMMENT=$(sed -n 's/^Title= *//p' < "$1" | head -1)
}

# File type: fodf (Open Document Format flat file)
# requires: xmlstarlet
comment_fodf () {
	COMMENT=$(xmlstarlet sel -t -v /office:document/office:meta/dc:title < "$1")
}

# File type: gcode (G-code machine control file)
# gcode has a few codes that could be used as titles, but aren't widely
# supported. Just extract a title from structured comments.
comment_gcode () {
	# Supports PrusaSlicer ver. >= 2.2.0, Cura >= ~4.0.0, Fanuc
	# Use the name of the first object when more than one are present
	COMMENT=$(sed -n -E -e 's/^; printing object ([^ ]+).*$/\1/p' -e 's/^;MESH://p' -e 's/^[^;(]*\<[Oo][0-9]+ *\(([^)]+)\).*$/\1/p' < "$1" | head -1 | trimspace)
	if [ -z "$COMMENT" ] ; then
		# Supports CamBam http://www.cambam.co.uk/
		COMMENT=$(sed -E -n -e '/Made using CamBam/{n;s/^\( *([^ ]+).*$/\1/p' -e '}'  < "$1")
	fi
	if [ -z "$COMMENT" ] ; then
		# PyCAM https://pycam.sourceforge.net/
		COMMENT=$(sed -n -E -e 's/^;PYCAM-META-DATA: Filename: (.*[/\\])?//p' < "$1" | head -1 | trimspace)
	fi
}

# File type: gif (GIF image)
# requires: gifsicle
comment_gif () {
	COMMENT=$(gifsicle --info -- "$1" | sed -n 's/^ *comment //p')
	if [ -z "$COMMENT" ] ; then
		comment_ffmpeg "$@"
	fi
}

# File type: gnumeric (Gnumeric spreadsheet)
# requires: xmlstarlet
comment_gnumeric () {
	COMMENT=$(xmlstarlet sel -N gnm=http://www.gnumeric.org/v10.dtd -N office=urn:oasis:names:tc:opendocument:xmlns:office:1.0 -N dc=http://purl.org/dc/elements/1.1/ -t -v '/gnm:Workbook/office:document-meta/office:meta/dc:title' < "$1")
}

# File type: gpx (GPX GPS track)
# requires: xmlstarlet
comment_gpx () {
	COMMENT=$(xmlstarlet sel -t -v "/*[local-name()='gpx']/*[local-name()='metadata']/*[local-name()='name']" < "$1" | trimspace)
	if [ -z "$COMMENT" ] ; then
		COMMENT=$(xmlstarlet sel -t -v "/*[local-name()='gpx']/*[local-name()='trk'][1]/*[local-name()='name']" < "$1" | trimspace)
	fi
}

# File type: ics (iCalendar entry)
# requires: python3, python3-vobject
comment_ics () {
	# Only looks at the first entry if more than one
	COMMENT=$(python3 -c "import vobject, sys; print(vobject.readOne(sys.stdin).contents['vevent'][0].summary.value)" < "$1")
}

# File type: iso (ISO-9660 CD-ROM image)
# requires: cdrkit-isotools
comment_iso () {
	COMMENT=$(isoinfo -d -i "$1" | sed -E -n -e 's/^Volume (set )?id: (.)/\2/p' | head -1)
}

# File type: jar (Java jar)
# requires: unzip
comment_jar () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(unzip -pqaa "$sf" META-INF/MANIFEST.MF | sed -n -E -e 's/^([Aa]pplication-[Nn]ame|[Bb]undle-[Nn]ame): *//p' | head -1)
}

# File type: jpeg (JPEG JFIF image)
# requires: jpeg-progs, exif
comment_jpeg () {
	COMMENT=$(rdjpgcom < "$1")
	if [ -z "$COMMENT" ] ; then
		COMMENT=$(exif --ifd=0 --tag=ImageDescription -m -- "$1" 2>/dev/null | trimspace)
	fi
	if [ -z "$COMMENT" ] ; then
		COMMENT=$(exif --ifd=0 --tag=Artist -m -- "$1" 2>/dev/null | trimspace)
	fi
	if [ -z "$COMMENT" ] ; then
		COMMENT=$(exif --ifd=0 --tag=XP_Author -m -- "$1" 2>/dev/null | trimspace)
	fi
	if [ -z "$COMMENT" ] ; then
		COMMENT=$(exif --ifd=0 --tag=Copyright -m -- "$1" 2>/dev/null | trimspace)
	fi
	if [ -z "$COMMENT" ] ; then
		COMMENT=$(exif --ifd=EXIF --tag=UserComment -m -- "$1" 2>/dev/null | trimspace)
	fi
}

# File type: first_line (first line of text file)
comment_first_line () {
	# Ignore completely blank lines and formatting lines consisting of all
	# dashes, ASCII art, etc.
	COMMENT=$(grep -v '^[-_=*#:~/\\|()<>[:space:]]*$' < "$1" | head -1 | trimspace)
}

# File type: html (HTML text)
comment_html () {
	COMMENT=$(sed -n 's,^.*<[tT][iI][tT][lL][eE]\>[^>]*>\(.*\)</.*$,\1,p' < "$1" | sed -e 's/&lt;/</g' -e 's/&gt;/>/g' | head -1)
}

# File type: kdenlive (Kdenlive file)
# requires: xmlstarlet
comment_kdenlive () {
	COMMENT=$(xmlstarlet sel -t -v '/mlt/playlist[@id="main_bin"]/property[@name="kdenlive:docmetadata.meta.attr.title.markup"]' < "$1")
}

# File type: man (man page)
# requires: man-db
comment_man () {
	COMMENT=$(LC_ALL=C lexgrog -- "$1" | grep -v ': parse failed$' | sed -e 's/^[^"]*"//' -e 's/"$//' | head -1)
}

# File type: mantxt (plain text version of man page)
comment_mantxt () {
	COMMENT=$(sed -n -Ee '/^(N.NA.AM.ME.E|NAME)$/{N;s/^[^ ]* *//p' -e '}' < "$f" | head -1)
	if [ -z "$COMMENT" ] ; then
		# It might be in roff format if it's not in plain ASCII format
		comment_man "$@"
	fi
}

# File type: md (Markdown text)
comment_md () {
	COMMENT=$(head -5 < "$1" | sed -n -e 's/[ #]*$//' -e 's/^# //p' | trimspace)
}

# File type: mkv (Matroska video)
# requires: mkvtoolnix
comment_mkv () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(mkvinfo "$sf" | sed -n -e 's/^[|+ ]*Title: //p')
	if [ -z "$COMMENT" ] ; then
		comment_ffmpeg "$@"
	fi
}

# File type: mlt (mlt video file)
# These are created by Kdenlive
# requires: xmlstarlet
comment_mlt () {
	COMMENT=$(xmlstarlet sel -t -v '/mlt/@title' < "$1")
}

# File type: mp3 (MPEG-1 layer 3 audio)
# id3info is the best program of the common ones I've found for this, since it
# supports both ID3v1 and ID3v2 tags, exports them both in the same format, and
# uses a format that's easy to parse.
# requires: id3lib || ffmpeg
comment_mp3 () {
	COMMENT=$(id3info -- "$1" | sort -r | sed -E -n -e 's/^=== (TPE1|TIT2).*\): //p' | sed '2i\
/
' | trimspace)
	if [ -z "$COMMENT" ] ; then
		comment_ffmpeg "$@"
	fi
}

# File type: mp4 (MPEG-4 video)
# requires: libquicktime-progs || ffmpeg
comment_mp4 () {
	COMMENT=$(qtdump "$1" | sed -n -e 's/^[[:space:]]*name:[[:space:]]*//p' | head -1)
	if [ -z "$COMMENT" ] ; then
		comment_ffmpeg "$@"
	fi
}

# File type: mov (QuickTime video)
# requires: libquicktime-progs || ffmpeg
comment_mov () {
	comment_mp4 "$@"
}

# File type: mscx (MuseScore music score)
# requires: xmlstarlet
comment_mscx () {
	COMMENT=$(xmlstarlet sel -t -v "/museScore/Score/metaTag[@name='workTitle']" < "$@")
}

# File type: mscz (MuseScore music score compressed)
# requires: unzip, xmlstarlet
comment_mscz () {
	sf="$(safefn "$1")"
	MEMBER=$(unzip -pqaa "$sf" META-INF/container.xml | xmlstarlet sel -t -v '/container/rootfiles/rootfile[substring(@full-path, string-length(@full-path) - 4) = ".mscx"]/@full-path')
	if [ -n "$MEMBER" ] ; then
		COMMENT=$(unzip -pqaa "$sf" "$MEMBER" | xmlstarlet sel -t -v "/museScore/Score/metaTag[@name='workTitle']" -)
	fi
}

# File type: pcap (Pcap network capture file)
# requires: wireshark-tools
comment_pcap () {
	COMMENT=$(capinfos -- "$1" | sed -n -e 's/^Capture comment: *//p')
}

# File type: palm (Palm Pilot file)
# requires: pilot-tools
comment_palm () {
	COMMENT=$(pilot-file -H -- "$1" | sed -n -e 's/^Name\.*: *//p')
}

# File type: sh (shell script)
comment_sh () {
	# delete #!/bin/sh line and blank comment lines
	# comment must be in the first 10 lines of the file
	COMMENT=$(sed -n -e '10,$d' -e '/^#\!/d' -e '/^#* *$/d' -e 's/^# *//p' < "$1" | head -1)
}

# File type: otf (OpenType font/TrueType font)
# requires: freetype2-demos
comment_otf () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(ftdump "$sf" | sed -n -E -e 's/^[[:space:]]*(family|style):[[:space:]]*//p' | trimspace)
}

# File type: ogg (Ogg media file)
# If artist is found, prepend it to the title like "Artist / Title"
# requires: vorbis-tools
comment_ogg () {
	COMMENT=$(ogginfo -- "$1" | sed -n -E -e '/^[[:space:]]*(title|artist)=/p' | sort | sed -E -e 's/^[[:space:]]*[a-z]+=//' | sed '2i\
/
' | trimspace)
}

# File type: odf (Open Document Format)
# requires: unzip, xmlstarlet
comment_odf () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(unzip -pq "$sf" meta.xml | xmlstarlet sel -t -v /office:document-meta/office:meta/dc:title 2>/dev/null)
}

# File type: oxt (Open Document Extension)
# requires: unzip, xmlstarlet
comment_oxt () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	# TODO: choose the appropriate language if more than one is available
	# instead of just using the first one
	COMMENT=$(unzip -pq "$sf" description.xml | xmlstarlet sel -N x=http://openoffice.org/extensions/description/2006 -t -v /x:description/x:display-name/x:name -nl | head -1)
}

# File type: py (Python source code)
comment_py () {
	# delete #!/bin/sh line and blank comment lines
	# comment must be in the first 10 lines of the file
	COMMENT=$(sed -n -e '10,$d' -e '/^#\!/d' -e '/^[[:space:]]*#*[-*#[:space:]]*$/d' -e 's/^[[:space:]]*# *//p' -e "s/\(.\) *'''$/\1/" -e 's/\(.\) *"""$/\1/' -e "s/^''' *\(.\)/\1/p" -e 's/^""" *\(.\)/\1/p' -e "/^''' *$/{n;p;}" -e '/^""" *$/{n;p;}' < "$1" | head -1)
}

# File type: uue (UUencoded file)
comment_uue () {
	COMMENT=$(sed -E -n 's/^begin [0-9]+ //p' < "$1")
}

# File type: vbox (VirtualBox machine file)
# requires: xmlstarlet
comment_vbox () {
	COMMENT=$(xmlstarlet sel -N vb=http://www.virtualbox.org/ -t -v '/vb:VirtualBox/vb:Machine/@name' -o ' (' -t -v '/vb:VirtualBox/vb:Machine/@OSType' -o ')' < "$1")
}

# File type: vbproj (Microsoft Visual Basic project file)
# This also works for related MS project files
# requires: xmlstarlet
comment_vbproj () {
	COMMENT=$(xmlstarlet sel -t -v "/*[local-name()='Project']/*[local-name()='PropertyGroup']/*[local-name()='Description']" -nl -v "/*[local-name()='Project']/*[local-name()='PropertyGroup']/*[local-name()='RootNamespace']" -nl < "$1" | sed '/^$/d' | head -1)
}

# File type: vcproj (Microsoft Visual Studio project file)
# requires: xmlstarlet
comment_vcproj () {
	COMMENT=$(xmlstarlet sel -t -v "/VisualStudioProject/@Name" < "$1")
}

# File type: wml (Wireless Markup Language)
# requires: xmlstarlet
comment_wml () {
	COMMENT=$(xmlstarlet sel -t -v "/wml/head/meta[@name='title']/@content" < "$1" 2>/dev/null)
}

# Get title from a number of different A/V formats
# Using ffmpeg for this is almost the only option for some formats, and is
# easier than having to install a new special program for each other formats.
# requires: ffmpeg
comment_ffmpeg () {
	COMMENT=$(ffmpeg -loglevel error -i "$1" -f ffmetadata - | sed -n -E -e 's/^(title|com.apple\.quicktime\.title)=//ip' | head -1)
	if [ -z "$COMMENT" ] ; then
		COMMENT=$(ffmpeg -loglevel error -i "$1" -f ffmetadata - | sed -n -E -e 's/^comment=//ip' | head -1)
	fi
}

# File type: wav (WAV audio)
# requires: ffmpeg
comment_wav () {
	comment_ffmpeg "$@"
}

# File type: aiff (Audio Interchange File Format)
# requires: ffmpeg
comment_aiff () {
	comment_ffmpeg "$@"
}

# File type: wacz (Web Archive Collection Zipped)
# requires: unzip, jq
comment_wacz () {
	# "safe" filename guaranteed not to start with a dash
	sf="$(safefn "$1")"
	COMMENT=$(unzip -pq "$sf" datapackage.json | jq -r '.title')
}

# File type: warcgz (Compressed Web Archive Collection file)
comment_warcgz () {
	# Look for the isPartOf header only in the first data section
	COMMENT=$(gzip -dc < "$1" | tr -d '\015' | sed -E -e '/^WARC\/1\.[0-9]+$/,/^$/d' | sed -E -n -e '1,/^$/s/^isPartOf: *//p' | head -1)
}

# File type: xbm (X bitmap image)
# Could extract image size here as well
comment_xbm () {
	COMMENT=$(sed -n 's/^.* \(.*\)_bits.*$/\1/p' < "$1")
}

# File type: xcf (Gimp image)
# requires: gimp
comment_xcf () {
	# "safe" filename with quoted double quotes
	sf="$(echo "$1" | sed 's/"/\\"/g')"
	# gimp's Scheme interpreter displays some logging info before and after the
	# desired output, so use sed to delete it to leave only XML.
	METADATA=$(echo '((display "\nMETADATA-START\n") (let ((img (car (gimp-file-load RUN-NONINTERACTIVE "'"$sf"'" "file")))) (display (car (gimp-image-get-metadata img))) (gimp-image-delete img)) (gimp-quit TRUE))' | gimp -n -i -d -f -s -g /dev/null --stack-trace-mode=never -b -  2>/dev/null | sed -e '1,/^METADATA-START/d' -e '/<\/metadata>/q')
	if [ -n "$METADATA" ]; then
		COMMENT=$(echo "$METADATA" | xmlstarlet sel -t -v  '/metadata/tag[@name="Xmp.dc.title"]' | sed 's/lang="x-default" *//')
	fi
}

# File type: xpm (X pixmap image)
# Could extract image size here as well
comment_xpm () {
	COMMENT=$(sed -n 's/^.*\* *\(.*\)_xpm.*$/\1/p' < "$1")
}

# File type: pam (Portable Arbitrary Map image)
# requires: netpbm >= 10.35.00
comment_pam () {
	COMMENT=$(pamfile -comments < "$1" | sed -e '1,/^Comments:$/d' -e 's/^ *# *//' | head -3 | trimspace)
}

# File type: pnm (pbm/pgm/ppm Portable Any Map image)
comment_pnm () {
	COMMENT=$(head -2 < "$1" | sed -n -e 's/^# *//p')
}


###########################

if [ $# -eq 0 -o "$1" = "-h" -o "$1" = "-?" ] ; then
	echo 'autodescribe ver. 8-dev'
	echo 'Usage: autodescribe [-?] [-h] [-l] [-e program] [ -q ] [ -t type ] file1 [ file2 ... ]'
	echo 'Extracts comments from files'
	echo '  -e cmd   command to run once for each file with args: comment file'
	echo '             e.g. "setfattr -n user.xdg.comment -v"'
	echo '  -h, -?   show this help'
	echo '  -l       list supported file types'
	echo '  -q       quiet output'
	echo '  -t type  where type is one of the names shown with -l'
	exit 1
fi

if [ "$1" = "-l" ]; then
	sed -n -e 's/^# File type: //p' < "$0" | sort -u
	exit 0
fi

if [ "$1" = "-e" ] ; then
	PROG="$2"
	shift
	shift
else
	PROG=
fi

if [ "$1" = "-q" ] ; then
	VERBOSE=0
	shift
else
	VERBOSE=1
fi

if [ "$1" = "-t" ] ; then
	SETTYPE="$2"
	shift
	shift
else
	SETTYPE=
fi

# Loop through files, commenting one at a time
for f in "$@" ; do
	COMMENT=""

	if ! [ -r "$f" ] ; then
		echo "$f": Not found 1>&2
		continue
	fi

	if [ -n "$SETTYPE" ] ; then
		TYPE="$SETTYPE"
	else
		case "$f" in
			*.3mf)
				TYPE=3mf
				;;
			*.abw | *.zabw | *.abw.gz)
				TYPE=abw
				;;
			*.ac)
				TYPE=ac
				;;
			*.adoc)
				TYPE=asciidoc
				;;
			*.aiff)
				TYPE=aiff
				;;
			*.amf)
				TYPE=amf
				;;
			*.apk)
				TYPE=apk
				;;
			*.appdata.xml | *.metainfo.xml)
				TYPE=appdata
				;;
			*.arj)
				TYPE=arj
				;;
			*.asm | *.s)
				TYPE=asm
				;;
			*.aup) TYPE=aup
				;;
			*.avi | *.wmv) TYPE=avi
				;;
			*.c | *.h)
				TYPE=c
				;;
			*CMakeLists.txt)	TYPE=cmake
				;;
			*.cue)	TYPE=cue
				;;
			*.d64 | *.d71 | *.d80 | *.d82 | *.x64 | *.t64)
				TYPE=cbm
				;;
			*.dar) TYPE=dar
				;;
			# Many DocBooks just use the .xml extension which is too generic
			*.dbk) TYPE=docbook
				;;
			*.deb) TYPE=deb
				;;
			*.desktop)	TYPE=desktop
				;;
			*.doc | *.xls | *.ppt)
				TYPE=doc
				;;
			*.docx | *.docm | *.pptx | *.xlsx)
				TYPE=docx
				;;
			*.egg)
				TYPE=egg
				;;
			*.exe | *.dll)
				TYPE=exe
				;;
			*.flac)	TYPE=flac
				;;
			*.flatpakref)	TYPE=flatpakref
				;;
			*.fodt | *.fods | *.fodp | *.fodg)
				TYPE=fodf
				;;
			*.gcode | *.gc | *.ngc | *.ncc | *.nc | *.tap)
				TYPE=gcode
				;;
			*.gif)	TYPE=gif
				;;
			*.gnumeric)	TYPE=gnumeric
				;;
			*.gpx)	TYPE=gpx
				;;
			*.html | *.htm)
				TYPE=html
				;;
			*.ics)	TYPE=ics
				;;
			*.iso)	TYPE=iso
				;;
			*.kdenlive)
				TYPE=kdenlive
				;;
			*.jar)	TYPE=jar
				;;
			*.jpeg | *.jpg)
				TYPE=jpeg
				;;
			*.kra)
				TYPE=kra
				;;
			*.kvtml)
				TYPE=kvtml
				;;
			*.lsm)
				TYPE=lsm
				;;
			*.lzh | *.lha)
				TYPE=lzh
				;;
			*.1 | *.2 | *.3 | *.4 | *.5 | *.6 | *.7 | *.8 | *.9)	TYPE=man
				;;
			*.man)	TYPE=mantxt
				;;
			*.mbox | *.mbx | *.mbo | *.nws | *.msg | *.eml | *.patch | *.diff)
				TYPE=subject
				;;
			*.md | *.mkd | *.mkdn | *.mdwn | *.mdown | *.markdown)
				TYPE=md
				;;
			*.mkv | *.webm)	TYPE=mkv
				;;
			*.mlt)
				TYPE=mlt
				;;
			*.mov)	TYPE=mov
				;;
			*.mp3)	TYPE=mp3
				;;
			*.mp4 | *.m4a)	TYPE=mp4
				;;
			*.mscx)	TYPE=mscx
				;;
			*.mscz)	TYPE=mscz
				;;
			*.odt | *.otm | *.ott | *.odh | *.odc | *.otc | *.odg | *.otg | *.odi | *.oti | *.odp | *.otp | *.ods | *.ots | *.odf | \
			*.sxw | *.sxc | *.sxi | *.sxd | *.sxm)
			# *.otf is left off because it's much more commonly a font file
				TYPE=odf
				;;
			*.oxt) TYPE=oxt
				;;
			*.ogg) TYPE=ogg
				;;
			*.otf | *.ttf | *.woff) TYPE=otf
				;;
			*.pam) TYPE=pam
				;;
			*.pas | *.p)
				TYPE=pascal
				;;
			*.pbm | *.pgm | *.ppm | *.pnm)
				TYPE=pnm
				;;
			*.pc)
				TYPE=pc
				;;
			*.pc.in)
				TYPE=pctmpl
				;;
			*.pcap | *.pcapng) TYPE=pcap
				;;
			*.pdb | *.prc) TYPE=palm
				;;
			*.pfa | *.gsf) TYPE=psfont
				;;
			*.plist)
				TYPE=plist
				;;
			*pyproject.toml)	TYPE=pyproject
				;;
			*.rpm)
				TYPE=rpm
				;;
			*.sh)	TYPE=sh
				;;
			*.pdf | *.ai)	TYPE=pdf
				;;
			*.pkg.tar.xz)	TYPE=pacman
				;;
			*.png)	TYPE=png
				;;
			*.ps)	TYPE=ps
				;;
			*.py)	TYPE=py
				;;
			*.rss | *.atom)	TYPE=rss
				;;
			*.kicad_sch | *.kicad_pcb)	TYPE=kicad
				;;
			*.sla)	TYPE=sla
				;;
			*.slob)	TYPE=slob
				;;
			*.snap)	TYPE=snap
				;;
			*.spdx)	TYPE=spdx
				;;
			*.stl)	TYPE=stl
				;;
			*.svg)	TYPE=svg
				;;
			*.svgz)	TYPE=svgz
				;;
			*.tar.bz2 | *.tbz | *.tbz2)
				TYPE=tbz2
				;;
			*.tar.gz | *.tar.Z | *.tar.z | *.tgz | *.taz)
				TYPE=tgz
				;;
			*.tlz | *.tar.lzma)
				TYPE=tlzma
				;;
			*.tar.lz)
				TYPE=tlzip
				;;
			*.tar.xz | *.txz)
				TYPE=txz
				;;
			*.tar.zst)
				TYPE=tzst
				;;
			*.tc)
				TYPE=tellico
				;;
			*.tex)
				TYPE=tex
				;;
			*.texi | *.texinfo)
				TYPE=texi
				;;
			*.tif | *.tiff)
				TYPE=tiff
				;;
			*.txt | *.asc | *.rst | *README | *Readme | *ReadMe | *readme | \
		    *file_id.diz)
				TYPE=first_line
				;;
			*.uue)
				TYPE=uue
				;;
			*.vbox)
				TYPE=vbox
				;;
			*.vbproj | *.csproj | *.vcxproj)
				TYPE=vbproj
				;;
			*.vcproj)
				TYPE=vcproj
				;;
			*.wacz)
				TYPE=wacz
				;;
			*.warc.gz)
				TYPE=warcgz
				;;
			*.wav)
				TYPE=wav
				;;
			*.whl)
				TYPE=whl
				;;
			*.wml)
				TYPE=wml
				;;
			*.xbm)
				TYPE=xbm
				;;
			*.xcf | *.xcf.gz | *.xcf.bz2 | *.xcf.xz | *.xcfgz | *.xcfbz2 | *.xcfxz)
				TYPE=xcf
				;;
			*.xhb)
				TYPE=xhb
				;;
			*.xpm)
				TYPE=xpm
				;;
			*.zip)
				TYPE=zip
				;;
			*.zoo)
				TYPE=zoo
				;;
			# These must come after more specific handlers above. This list comes
			# directly from the output of "ebook-meta --help", plus *.xhtml
			*.xhtml | \
			*.azw | *.azw1 | *.azw3 | *.azw4 | *.cbr | *.cbz | *.chm | *.docx | *.epub | \
			*.fb2 | *.fbz | *.html | *.htmlz | *.imp | *.lit | *.lrf | *.lrx | *.mobi | \
			*.odt | *.oebzip | *.opf | *.pdb | *.pdf | *.pml | *.pmlz | *.pobi | *.prc | \
			*.rar | *.rb | *.rtf | *.snb | *.tpz | *.txt | *.txtz | *.updb | *.zip)
				TYPE=ebook
				;;
			*)
				TYPE=""
				;;
		esac
	fi

	if [ -z "$TYPE" ] ; then
		echo "$f": Not a known file type 1>&2
	else
		# Call the right function to find the comment
		comment_"$TYPE" "$f"
	fi

	if [ -z "$COMMENT" ] ; then
		echo "$f": No comment found 1>&2
	else
		if [ "$VERBOSE" -eq 1 ] ; then
			shquote "$f"
			printf " "
			shquote "$COMMENT"
			echo ""
		fi
		if [ -n "$PROG" ]; then
			eval $PROG $(shquote "$COMMENT") $(shquote "$f")
		fi
	fi
done
exit 0