-
Notifications
You must be signed in to change notification settings - Fork 0
/
autodescribe
executable file
·1705 lines (1538 loc) · 50.5 KB
/
autodescribe
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/bin/sh
# autodescribe - automatically extract comments from files
#
# Copyright (c) 1998-2024 Dan Fandrich <[email protected]>
# Licensed under the MIT license (see LICENSE).
if [ -n "$ZSH_VERSION" ]; then
# This is needed to make zsh work like the other shells and split arguments
# in environment variables, which is need for $DECOMPRESS
setopt shwordsplit
fi
# Print a shell-quoted version of the first argument
shquote () {
printf '%s' "$1" | awk -v q="'" '{gsub(q, q "\\" q q, $0); printf "%s", q $0 q;}'
}
# Filter to trim leading and trailing whitespace and concatenate lines
trimspace () {
tr '\n' ' ' | tr -d '\015' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//'
}
# Drop a comment if it's too generic and not descriptive of an archive.
# It may be fine as a file description, however, so use this function only
# in contexts that make sense.
drop_useless_comment () {
# RCS Id
if [ -z "${1##\$Id:*}" ]; then
return
fi
# Strip uninteresting characters before comparison
BARE=$(echo "$1" | tr -d '.:=\000-' | trimspace)
if echo "$BARE" | grep -Ei '^(about|background|contents|description|example|general information|install|installation|instructions|intro|introduction|module|name|overview|project|readme|readme\.txt|status|summary|synopsis|This is a Bazaar control directory|todo|tools|usage|usage guide)$' >/dev/null; then
return
fi
echo "$1"
}
# Return the full language locale with dashes instead of underscores
# This might return a locale with or without a specific country.
get_lang_full () {
locale | sed -n -E -e 's/_/-/g' -e '/^LANG=/s/^.*=([^.]*)\.?.*$/\1/p'
}
# Same as get_lang_full but with underscores instead of dashes
get_lang_full_under () {
get_lang_full | tr '-' '_'
}
# Language locale with country-specific portion removed
get_lang_generic () {
get_lang_full | sed 's/-.*$//'
}
# Make a filename starting with a dash - safe to provide a program that
# would interpret it as an option.
safefn () {
case "$1" in
-*) echo "./$1" ;;
*) echo "$1" ;;
esac
}
# Following are functions to extract descriptions for specific file types
# This is a generic tar comment extractor. Comments are taken from embedded
# files, such as man pages, Appdata files, etc. The first argument is the
# file name, as normal, but the second argument is a command to decompress
# the tar archive and extract it to stdout.
#
# BUG: GNU tar will create empty directories, or directories containing
# symbolic links between ver. 1.11.2 until 1.33 (appears fixed in 1.34)
# BUG: GNU tar will extract all files if path ends in a number, e.g.
# playmidi-2.3/foo
# requires: tar, gzip, man-db, xmlstarlet
get_comment_compressed_tar () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
DECOMPRESS="$2"
TMPFILE=$(mktemp)
TMPFILE2=$(mktemp)
# First, scan for all the possible embedded types we support. A list of all
# possible file types are scanned first so the file only needs to be
# decompressed a maximum of one more time (assuming a successful comment
# extraction) to save time while also avoiding having lots of temporary
# files lying around at the same time.
#
# Skip any readme files more than 2 levels down in the directory hierarchy.
# Sort files by distance from root, so files higher up will be used first
# when there is more than one. Files that appears to be in a documentation
# directory get a half-level boost and those that appear to be in a dotted
# (hidden) directory get a full level demotion. Because of this sorting,
# wildcards cannot be used to extract files because the extraction order is
# the order encountered in the file, not the order specified.
$DECOMPRESS "$1" | tar -tf - | \
grep -E '(\.man|\.[0-9]|\.lsm|\.appdata\.xml|\.metainfo\.xml|\.desktop|configure\.ac|README\.(adoc|md|rst|txt)|Readme\.(adoc|md|rst|txt)|ReadMe\.(adoc|md|rst|txt)|readme\.(adoc|md|rst|txt)|README|Readme|ReadMe|readme|\.texi|\.texinfo|pyproject.toml|CMakeLists\.txt|\.pc|\.pc\.in|\<file_id\.diz)$' | \
awk 'BEGIN {FS="/"} {doc=!!match($0, "/(([Dd]oc)|[Mm]an|[Ii]nfo)"); dot=!!index($0, "/."); print split($0, a)*2-doc+2*dot "\t" $0;}' | \
sort -n | \
cut -f2- | \
grep -viE '^.*/.*/.*/.*readme(\.[a-z]*)?$' > "$TMPFILE"
if [ -s "$TMPFILE" ]; then
# Found at least one candidate file
# Try to find the base name of the tar ball, without version numbers
# and file extensions. This isn't always easy, so use two heuristics to
# find one shorter and one (possibly) longer candidate.
BASENAME1=$(basename "$sf" | sed -e 's/[^a-zA-Z0-9].*$//')
BASENAME2=$(basename "$sf" | sed -n -e 's/^\([a-zA-Z0-9]\+\(-[a-zA-Z][a-zA-Z0-9]*\)*\).*$/\1/p')
# Note: file types that do not tolerate concatenation with subsequent
# files (e.g. XML) must use MATCHNAME to extract only one single file
# instead of using wildcards (which is also bad given the reason
# above).
# Appdata file
MATCHNAME=$(grep -E '(\.appdata\.xml|\.metainfo\.xml)$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
comment_appdata "$TMPFILE2"
fi
# XDG desktop file
MATCHNAME=$(grep '\.desktop$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
comment_desktop "$TMPFILE2"
fi
# Linux Software Map
# This is basically obsolete these days but can be found in old
# archives.
MATCHNAME=$(grep '\.lsm$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
comment_lsm "$TMPFILE2"
fi
# file_id.diz
# This is likely obsolete these days but can be found in old
# archives.
MATCHNAME=$(grep 'file_id\.diz$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
comment_first_line "$TMPFILE2"
fi
# pyproject.toml
MATCHNAME=$(grep 'pyproject\.toml$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
comment_pyproject "$TMPFILE2"
fi
# man page
# First, look for a man page based on the simple name of the tar file
MATCHNAME=$(grep -iE "(^|/)$BASENAME1(\.man|.[0-9])$" < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -z "$MATCHNAME" ]; then
# Expand the attempt to find a matching man page
MATCHNAME=$(grep -iE "(^|/)$BASENAME2(\.man|.[0-9])$" < "$TMPFILE" | head -1)
fi
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
comment_man "$TMPFILE2"
fi
# Finally, try the first man page found
# Only look at the first matching man page since lexgrog wants only one
# and since some archives contain hundreds. The first one might not
# be the best match, unfortunately.
MATCHNAME=$(grep -E '(\.man|.[0-9])$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
comment_man "$TMPFILE2"
fi
# TeXinfo pages
# First, look for a texinfo page based on the simple name of the tar
# file
MATCHNAME=$(grep -iE "(^|/)$BASENAME1\.texi(nfo)?$" < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -z "$MATCHNAME" ]; then
# Expand the attempt to find a matching texinfo page
MATCHNAME=$(grep -iE "(^|/)$BASENAME2\.texi(nfo)?$" < "$TMPFILE" | head -1)
fi
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
comment_texi "$TMPFILE2"
fi
# Finally, try any texinfo pages
# Note: this doesn't handle spaces in filenames
MATCHNAME=$(grep -E '\.texi(nfo)?$' < "$TMPFILE")
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - $MATCHNAME 2>/dev/null > "$TMPFILE2"
comment_texi "$TMPFILE2"
fi
# pkg-config file
MATCHNAME=$(grep -i '\.pc$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" 2>/dev/null > "$TMPFILE2"
comment_pc "$TMPFILE2"
fi
# pkg-config template file
MATCHNAME=$(grep -i '\.pc\.in$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" 2>/dev/null > "$TMPFILE2"
comment_pctmpl "$TMPFILE2"
fi
# README.md file
MATCHNAME=$(grep -i 'README\.md$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" 2>/dev/null > "$TMPFILE2"
comment_md "$TMPFILE2"
fi
# Drop the comment if it's not useful in this context
COMMENT=$(drop_useless_comment "$COMMENT")
# README.rst file
MATCHNAME=$(grep -i 'README\.rst$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" 2>/dev/null > "$TMPFILE2"
comment_first_line "$TMPFILE2"
fi
COMMENT=$(drop_useless_comment "$COMMENT")
# README.adoc file
MATCHNAME=$(grep -i 'README\.adoc$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" 2>/dev/null > "$TMPFILE2"
comment_asciidoc "$TMPFILE2"
fi
COMMENT=$(drop_useless_comment "$COMMENT")
# README file
MATCHNAME=$(grep -iE 'README(\.txt)?$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" 2>/dev/null > "$TMPFILE2"
comment_first_line "$TMPFILE2"
fi
COMMENT=$(drop_useless_comment "$COMMENT")
# GNU autoconf
MATCHNAME=$(grep 'configure\.ac$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
comment_ac "$TMPFILE2"
fi
# CMake
MATCHNAME=$(grep 'CMakeLists\.txt$' < "$TMPFILE" | head -1)
if [ -z "$COMMENT" -a -n "$MATCHNAME" ]; then
$DECOMPRESS "$1" | tar -xOf - "$MATCHNAME" > "$TMPFILE2"
comment_cmake "$TMPFILE2"
fi
fi
if [ -n "$TMPFILE2" ]; then
rm -f "$TMPFILE2"
fi
if [ -n "$TMPFILE" ]; then
rm -f "$TMPFILE"
fi
}
# File type: tbz2 (bzip2-compressed tar archive)
# Comments are taken from embedded files.
# requires: tar, bzip2, man-db, xmlstarlet
comment_tbz2 () {
get_comment_compressed_tar "$1" 'bzip2 -dc --'
}
# File type: tgz (compressed tar archive)
# Comments are taken from embedded files.
# requires: tar, gzip, man-db, xmlstarlet
comment_tgz () {
get_comment_compressed_tar "$1" 'gzip -dc --'
}
# File type: tlzip (lzip-compressed tar archive)
# Comments are taken from embedded files.
# requires: tar, lzip, man-db, xmlstarlet
comment_tlzip () {
get_comment_compressed_tar "$1" 'lzip -dc --'
}
# File type: tlzma (lzma-compressed tar archive)
# Comments are taken from embedded files.
# requires: tar, lzma, man-db, xmlstarlet
comment_tlzma () {
get_comment_compressed_tar "$1" 'lzma -dc --'
}
# File type: txz (xzip-compressed tar archive)
# Comments are taken from embedded files.
# requires: tar, xz, man-db, xmlstarlet
comment_txz () {
get_comment_compressed_tar "$1" 'xz -dc --'
}
# File type: tzst (zstd-compressed tar archive)
# Comments are taken from embedded files.
# requires: tar, zstd, man-db, xmlstarlet
comment_tzst () {
get_comment_compressed_tar "$1" 'zstd -dc --'
}
# File type: lzh (lzh/lha archive with desc.sdi comment)
# requires: lha || lhasa
comment_lzh () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
COMMENT=$( (lha pq "$sf" desc.sdi 2>/dev/null || lhasa pq - desc.sdi < "$1" 2>/dev/null) | head -3 | trimspace)
}
# File type: zip (zip archive)
# requires: unzip (Info-ZIP version)
comment_zip () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
if [ -n "$(unzip -zq "$sf" dummy-file-to-eliminate-output)" ]; then
COMMENT=$(unzip -zq "$sf" dummy-file-to-eliminate-output | head -2 | trimspace)
elif unzip -vqq "$sf" desc.sdi > /dev/null ; then
COMMENT=$(unzip -pq "$sf" desc.sdi | head -3 | trimspace)
fi
}
# File type: zoo (zoo archive)
# requires: unzoo || zoo
comment_zoo () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
COMMENT=$(unzoo -l -v "$sf" | sed -n -e '/^Length/,$d' -e 's/^# //p' | head -3 | trimspace)
if [ -z "$COMMENT" ]; then
COMMENT=$(zoo vc "$sf" dummy-file-to-eliminate-output | sed -n -e 's/^>> //p' | trimspace)
fi
if [ -z "$COMMENT" ] && zoo vq "$sf" desc.sdi > /dev/null ; then
COMMENT=$(zoo xpq "$sf" desc.sdi | head -3 | trimspace)
fi
}
# File type: plist (Apple property list)
# requires: xmlstarlet
comment_plist () {
# Search for the more desirable names first
COMMENT=$(xmlstarlet sel -t -m "/plist/dict/key[normalize-space(text())='CFBundleDisplayName'][1]" -v "following-sibling::string[1]" -nl -t -m "/plist/dict/key[normalize-space(text())='CFBundleGetInfoString'][1]" -v "following-sibling::string[1]" -nl -t -m "/plist/dict/key[normalize-space(text())='OpenSourceProject'][1]" -v "following-sibling::string[1]" -nl -t -m "/plist/dict/key[normalize-space(text())='Label'][1]" -v "following-sibling::string[1]" -nl -t -m "/plist/dict/key[normalize-space(text())='CFBundleName'][1]" -v "following-sibling::string[1]" -nl -t -m "/plist/dict/key[normalize-space(text())='CFBundleIdentifier'][1]" -v "following-sibling::string[1]" -nl < "$1" 2>/dev/null | head -1)
}
# File type: pyproject.toml (Python project definition)
comment_pyproject () {
COMMENT=$(sed -En -e '/^\[project\]$/,/^\[/s/^description *= *"?([^"]*)"? *$/\1/p' < "$1" | head -1)
if [ -z "$COMMENT" ]; then
COMMENT=$(sed -En -e '/^\[project\]$/,/^\[/s/^name *= *"?([^"]*)"? *$/\1/p' < "$1" | head -1)
fi
}
# File type: rpm (rpm package)
# requires: rpm
comment_rpm () {
COMMENT=$(rpm -qp --queryformat '%{SUMMARY}' -- "$1")
}
# File type: cmake (CMake build file)
comment_cmake () {
COMMENT=$(sed -nE -e 's/^[[:space:]]*[Pp][Rr][Oo][Jj][Ee][Cc][Tt][[:space:]]*\([ "'"'"']*([^"'"'"']*)[ "'"'"']*\)[[:space:]]*(#.*)?$/\1/p' < "$1" | trimspace)
}
# File type: cue (CD cue index file)
comment_cue () {
COMMENT=$(sed -n -E -e '/^TITLE/s/^TITLE *"([^"]*)" *$/\1/p' < "$1" | head -1)
}
# File type: cbm (Commodore disk image)
# See https://github.com/dfandrich/fvcbm/
# requires: fvcbm
comment_cbm () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
COMMENT=$(fvcbm "$sf" | sed -Ene 's/^Title: {1,3}(.{1,16}).*$/\1/p' -e '/^={16,}/q' | trimspace)
}
# File type: desktop (XDG desktop entry file)
comment_desktop () {
# First look for the name, localized if possible
# Get current locale language
L="$(get_lang_full_under)"
if [ -z "$L" ]; then
L=en # English by default
fi
COMMENT=$(sed -n -e "s/^Name\[$L\]=//p" < "$1" | head -1)
if [ -z "$COMMENT" ]; then
# Try again with generic language locale
L="$(get_lang_generic)"
COMMENT=$(sed -n -e "s/^Name\[$L\]=//p" < "$1" | head -1)
if [ -z "$COMMENT" ]; then
# Try again with English
L=en
COMMENT=$(sed -n -e "s/^Name\[$L\]=//p" < "$1" | head -1)
if [ -z "$COMMENT" ]; then
# Finally, try the non-locale-dependent entry
COMMENT=$(sed -n -e "s/^Name=//p" < "$1" | head -1)
fi
fi
fi
NAME="$COMMENT"
# Next look for the comment, localized if possible
# Get current locale language
L="$(get_lang_full_under)"
if [ -z "$L" ]; then
L=en # English by default
fi
COMMENT=$(sed -n -e "s/^Comment\[$L\]=//p" < "$1" | head -1)
if [ -z "$COMMENT" ]; then
# Try again with generic language locale
L="$(get_lang_generic)"
COMMENT=$(sed -n -e "s/^Comment\[$L\]=//p" < "$1" | head -1)
if [ -z "$COMMENT" ]; then
# Try again with English
L=en
COMMENT=$(sed -n -e "s/^Comment\[$L\]=//p" < "$1" | head -1)
if [ -z "$COMMENT" ]; then
# Finally, try the non-locale-dependent entry
COMMENT=$(sed -n -e "s/^Comment=//p" < "$1" | head -1)
fi
fi
fi
# Now, use the right combination if more than one is found
if [ -n "$NAME" ]; then
if [ -n "$COMMENT" ]; then
COMMENT="$NAME, $COMMENT"
else
COMMENT="$NAME"
fi
fi
}
# File type: doc (Microsoft composite document)
# requires: file
comment_doc () {
# This will truncate titles with commas, but there's no foolproof way
# to avoid it due to the simplistic output format used by file
COMMENT=$(file - < "$f" | sed -n -e 's@^.*Title: \+\([^,]*\).*$@\1@p')
if [ -z "$COMMENT" ] ; then
COMMENT=$(file - < "$f" | sed -n -e 's@^.*Subject: \+\([^,]*\).*$@\1@p')
fi
}
# File type: docx (Microsoft Office document)
# requires: unzip, xmlstarlet
comment_docx () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
COMMENT=$(unzip -pq "$sf" docProps/core.xml | xmlstarlet sel -t -v /cp:coreProperties/dc:title)
}
# File type: egg (Python egg package)
# requires: unzip
comment_egg () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
COMMENT=$(unzip -pq "$sf" EGG-INFO/PKG-INFO | sed -En 's/^(Name|Summary): *//p' | sed -e N -e 's/\n/, /')
}
# File type: exe (Microsoft Windows PE executable)
# File type: dll (Microsoft Windows PE dynamic link library)
# requires: python >= 3, pefile (see https://github.com/erocarrera/pefile/)
comment_exe () {
COMMENT=$(python3 -c '
import pefile,sys
try:
pe=pefile.PE(data=sys.stdin.buffer.read())
if hasattr(pe, "FileInfo"):
print("".join([v.decode("UTF-8")
for fi in pe.FileInfo
for sfi in fi if hasattr(sfi, "StringTable")
for item in sfi.StringTable
for k,v in item.entries.items() if k == b"FileDescription" ]))
except pefile.PEFormatError:
pass # probably an old-style file
' < "$1")
}
# File type: ebook (E-book formats, epub, azw, fbz, etc.)
# These are all handled by ebook-meta (part of calibre) so there doesn't seem to be a
# good reason to separate them into individual handlers.
# requires: calibre
comment_ebook() {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
COMMENT=$(ebook-meta -- "$1" | sed -n -e 's/^Title *: //p')
# ebook-meta falls back to the file name (without extension) as the title
# if no other title can be found. This isn't very useful here, so blank the
# comment if that happens.
BASENAME=$(basename "$sf" | sed -e 's/\.[^.]*$//')
if [ "$COMMENT" = "$BASENAME" ]; then
COMMENT=""
fi
}
# File type: kicad (Kicad schematic)
comment_kicad () {
COMMENT=$(sed -n -e '1,/(title_block/d' -e 's/^[[:space:]]*(title "\(.*\)")$/\1/p' -e '/^[[:space:]]*)$/,$d' < "$1")
}
# File type: kra (Krita image)
# requires: unzip, xmlstarlet
comment_kra () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
COMMENT=$(unzip -pq "$sf" documentinfo.xml | xmlstarlet sel -N d=http://www.calligra.org/DTD/document-info -t -v /d:document-info/d:about/d:title 2>/dev/null )
}
# File type: kvtml (kwordquiz flash card file)
# requires: xmlstarlet
comment_kvtml () {
COMMENT=$(xmlstarlet sel -t -v '/kvtml/information/title' < "$1" 2>/dev/null)
}
# File type: lsm (Linux Software Map)
comment_lsm () {
# Title can probably be multiline, but we don't bother
COMMENT=$(sed -n 's/^Title:[[:space:]]*//p' < "$1" | head -1)
}
# File type: pc (pkg-config library file)
# If a shell variable substitution is noted, then that field is ignored.
comment_pc () {
# First look for the name
COMMENT=$(sed -n 's/^Name: *//p' < "$1" | head -1)
if echo "$COMMENT" | grep '\$' >/dev/null; then
COMMENT=
fi
NAME="$COMMENT"
# Next look for the description
COMMENT=$(sed -n 's/^Description: *//p' < "$1" | head -1)
if echo "$COMMENT" | grep '\$' >/dev/null; then
COMMENT=
fi
# Now, use the right combination if more than one is found
if [ -n "$NAME" ]; then
if [ -n "$COMMENT" ]; then
COMMENT="$NAME, $COMMENT"
else
COMMENT="$NAME"
fi
fi
}
# This is the autoconf-style templatized version of a .pc file.
# It works the same as a normal pkg-config file except if a template
# substitution is noted, then that field is ignored.
# File type: pctmpl (pkg-config library file template)
comment_pctmpl () {
# First look for the name
COMMENT=$(sed -n 's/^Name: *//p' < "$1" | head -1)
if echo "$COMMENT" | grep '[$@]' >/dev/null; then
COMMENT=
fi
NAME="$COMMENT"
# Next look for the description
COMMENT=$(sed -n 's/^Description: *//p' < "$1" | head -1)
if echo "$COMMENT" | grep '[$@]' >/dev/null; then
COMMENT=
fi
# Now, use the right combination if more than one is found
if [ -n "$NAME" ]; then
if [ -n "$COMMENT" ]; then
COMMENT="$NAME, $COMMENT"
else
COMMENT="$NAME"
fi
fi
}
# File type: pacman (Pacman package)
# requires: tar, xz
# TODO: these packages can be compressed with gzip, bzip2 and zstd as well
comment_pacman () {
COMMENT=$(xz -dc < "$1" | tar -xOf - .PKGINFO | sed -n -e 's/^pkgname = //p' -e 's/^pkgdesc = /, /p' | tr -d '\n')
}
# File type: pdf (Portable Document Format)
# requires: poppler
comment_pdf () {
COMMENT=$(pdfinfo -- "$1" | sed -E -n -e 's/^(Title|Subject|Author):[[:space:]]*//p' | head -1)
if echo "$COMMENT" | grep -iE '^untitled(-[0-9]*)?$' >/dev/null; then
# Drop this common but useless comment
COMMENT=""
fi
}
# File type: png (PNG image)
# requires: pngtools
comment_png () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
COMMENT=$(pnginfo "$sf" | sed -n -e 's/^[[:space:]]\+Title[^:]*: //p')
if [ -z "$COMMENT" ] ; then
COMMENT=$(pnginfo "$sf" | sed -n -e 's/^[[:space:]]\+Description[^:]*: //p')
fi
}
# File type: ps (PostScript source code)
comment_ps () {
COMMENT=$(sed -n 's/^%%Title: *//p' < "$1" | head -1)
if [ -z "$COMMENT" ] ; then
# Try for an alternate comment
# Look for a comment beginning with one or more % followed by a space
COMMENT=$(sed -n 's/^%%* *//p' < "$1" | head -1)
fi
}
# File type: psfont (PostScript font)
comment_psfont () {
COMMENT=$(sed -n 's/^\/FullName[ ]*(\(.*\)).*$/\1/p' < "$1" | head -1)
if [ -z "$COMMENT" ] ; then
# Try for an alternate comment
COMMENT=$(sed -n 's/^\/FontName[ ]*\/\([^ ]*\).*$/\1/p' < "$1" | head -1)
fi
}
# File type: rss (Really Simple Syndication)
# requires: xmlstarlet
comment_rss () {
COMMENT=$(xmlstarlet sel -t -v '/rss/channel/title' < "$1")
}
# File type: subject (news article or mail file)
# File type: patch (git format-patch)
comment_subject () {
COMMENT=$(sed -n 's/^Subject: *//ip' < "$1" | head -1 | trimspace)
if [ -z "$COMMENT" ] ; then
# Try for an alternate comment
COMMENT=$(sed -n 's/^Content-Description: *//ip' < "$1" | head -1 | trimspace)
fi
}
# File type: sla (Scribus document)
# requires: xmlstarlet
comment_sla () {
COMMENT=$(xmlstarlet sel -t -v '/SCRIBUSUTF8NEW/DOCUMENT/@TITLE' < "$1")
}
# File type: slob (Sorted List of Blobs dictionary)
# See https://github.com/itkach/slob/
# requires: slob
comment_slob () {
COMMENT=$(slob tag -n label -- "$1")
}
# File type: snap (Snap package)
# requires: squashfs
comment_snap () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
COMMENT=$(unsquashfs -cat "$sf" meta/snap.yaml | sed -n 's/^summary: //p')
}
# File type: spdx (Software Package Data Exchange)
# See https://spdx.dev/
comment_spdx () {
# This works for SPDX 2.X but may not for SPDX 3.X (once it's finalized)
COMMENT=$(sed -n 's/^DocumentName: *//p' < "$1" | head -1)
}
# File type: stl (STL model file)
comment_stl () {
# Detect ASCII or binary format
if [ "$(dd if="$1" bs=1 count=80 2>/dev/null | wc -l)" -gt 0 -a "$(dd if="$1" bs=1 count=6 2>/dev/null | tr -d '\0')" = "solid " -a "$(dd if="$1" bs=1 count=1 skip=80 2>/dev/null | od -An -b)" != "000" ]; then
# ASCII stl format
COMMENT=$(sed -n -E -e 's/[[:space:]]*$//' -e '1s/^solid +//p' < "$1")
else
# Binary stl format
COMMENT=$(dd if="$1" bs=1 count=80 2>/dev/null | tr -d '\0' | sed -e 's/[[:space:]]*$//' | iconv -f windows-1252)
fi
}
# File type: svg (Scalable Vector Graphics image)
# requires: xmlstarlet
comment_svg () {
COMMENT=$(xmlstarlet sel -t -v "/*[local-name()='svg']/*[local-name()='title']" < "$1")
if [ -z "$COMMENT" ] ; then
# Try for title in metadata
COMMENT=$(xmlstarlet sel -t -v "/*[local-name()='svg']/*[local-name()='metadata']/*[local-name()='RDF']/*[local-name()='Work']/*[local-name()='title']" < "$1")
fi
}
# File type: svgz (Compressed Scalable Vector Graphics image)
# requires: gzip, xmlstarlet
comment_svgz () {
COMMENT=$(gzip -dc < "$1" | xmlstarlet sel -t -v "/*[local-name()='svg']/*[local-name()='title']")
if [ -z "$COMMENT" ] ; then
# Try for title in metadata
COMMENT=$(gzip -dc < "$1" | xmlstarlet sel -t -v "/*[local-name()='svg']/*[local-name()='metadata']/*[local-name()='RDF']/*[local-name()='Work']/*[local-name()='title']")
fi
}
# File type: tellico (Tellico database file)
# requires: unzip, xmlstarlet
comment_tellico () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
# stderr always contains a warning about the DTD; send that (and every
# other warning as a side effect) to /dev/null
COMMENT=$(unzip -pq "$sf" tellico.xml | xmlstarlet sel -t -v "/*[local-name()='tellico']/*[local-name()='collection']/@title" 2> /dev/null)
}
# File type: tex (TeX document)
comment_tex () {
# This parsing is probably too simplistic, but it works much of the time
COMMENT=$(sed -n -E -e 's/^[[:space:]]*\\title\{([^}]+)\}.*/\1/p' < "$1" | trimspace)
}
# File type: texi (Texinfo document)
comment_texi () {
# This parsing is probably too simplistic, but it works most of the time
COMMENT=$(sed -n -e 's/^[[:space:]]*@settitle //p' < "$1" | trimspace)
}
# File type: tiff (TIFF image)
# requires: libtiff-progs
comment_tiff () {
# Sort the tags so that ImageDescription is used in preference to the
# others.
COMMENT=$(tiffinfo -- "$1" | sort -r | sed -E -n -e 's/^ *(ImageDescription|DocumentName|Artist): //p' | head -1)
}
# File type: whl (Python wheel package)
# requires: unzip
comment_whl () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
COMMENT=$(unzip -pq "$sf" '*.dist-info/METADATA' | sed -En 's/^(Name|Summary): *//p' | sed -e N -e 's/\n/, /')
}
# File type: xhb (Homebank file)
# requires: xmlstarlet
comment_xhb () {
COMMENT=$(xmlstarlet sel -t -v '/homebank/account/@name' < "$1")
}
# File type: ac (autoconf source code)
comment_ac () {
# Catch this style:
# AC_INIT([package], [version], [address])
# but not this (old) style:
# AC_INIT(inc/libxyz.h.in)
COMMENT=$(sed -E -n 's/^[[:space:]]*AC_INIT\([[:space:]]*\[?([^],]+).*,.*\)/\1/p' < "$1")
}
# File type: c (C source code)
comment_c () {
COMMENT=$(sed -n '/\/\*/,/\*\//{
/\/\*/s/^.*\/\*[ ]*//
/\*\//s/.*\*\/[ ]*//
s/^[ *]*//
s/\*\/ *$//
s/[ ]*$//
p
}' < "$1" | head -3 | trimspace)
}
# File type: pascal (Pascal source code)
comment_pascal () {
COMMENT=$(sed -n -e '/^[[:space:]]*(\*[-\* ]*\*)[[:space:]]*$/d' -e '/^[[:space:]]*\*[[:space:]]*$/d' -e '/(\*/,/\*)/{
/(\*/s/^.*(\*//
s/^[ *]*//
s/[ ]*$//
p
}' -e '/{.*}/{
s/^.*{//
s/} *$//
s/^[ *]*//
s/[ ]*$//
p
}' < "$1" | head -3 | trimspace)
}
# File type: 3mf (3-D Manufacturing format)
# requires: unzip, xmlstarlet
comment_3mf () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
COMMENT=$(unzip -pq "$sf" 3D/3dmodel.model | xmlstarlet sel -t -v "/*[local-name()='model']/*[local-name()='metadata'][@name='Title']")
}
# File type: abw (AbiWord document)
# requires: xmlstarlet
comment_abw () {
COMMENT=$(xmlstarlet sel -N a=http://www.abisource.com/awml.dtd -t -v "/a:abiword/a:metadata/a:m[@key='dc.title']" < "$1" 2>/dev/null)
}
# File type: asciidoc (AsciiDoc text)
comment_asciidoc () {
COMMENT=$(head -10 < "$1" | sed -nE -e 's/[ =]*$//' -e 's/^=+ //p' | head -1 | trimspace)
}
# File type: amf (Additive Manufacturing File)
# requires: unzip, xmlstarlet
comment_amf () {
# "safe" filename guaranteed not to start with a dash
sf="$(safefn "$1")"
if [ "$(dd if="$1" bs=1 count=2 2>/dev/null )" = "PK" ]; then
# If the amf file is zip compressed, uncompress it first
TMPFILE=$(mktemp)
INFILE="${TMPFILE}"
unzip -pq "$sf" > "$INFILE"
else
TMPFILE=
INFILE="$1"
fi
COMMENT=$(xmlstarlet sel -t -v "/amf/metadata[@type='name']" < "$INFILE")
if [ -z "$COMMENT" ] ; then
# BUG: There will be no separators between multiple object names
COMMENT=$(xmlstarlet sel -t -v "/amf/object/metadata[@type='name']" < "$INFILE")
fi
if [ -n "$TMPFILE" ]; then
rm -f "$TMPFILE"
fi
}
# File type: apk (Android Package Kit)
# requires: Android SDK
comment_apk () {
# Get current locale language
L="$(get_lang_full)"
if [ -z "$L" ]; then
L=en # English by default
fi
COMMENT=$(aapt d badging "$1" | grep "^application-label-${L}" | sed -e "s/^[^']*'//" -e "s/'.*$//" | head -1)
if [ -z "$COMMENT" ]; then
# Try again with generic locale
L="$(get_lang_generic)"
COMMENT=$(aapt d badging "$1" | grep "^application-label-${L}" | sed -e "s/^[^']*'//" -e "s/'.*$//" | head -1)
if [ -z "$COMMENT" ]; then
# Try again with English
L=en
COMMENT=$(aapt d badging "$1" | grep "^application-label-${L}" | sed -e "s/^[^']*'//" -e "s/'.*$//" | head -1)
fi
fi
}
# File type: appdata (Appdata metainfo file)
# requires: xmlstarlet
comment_appdata () {
# First look for the name, localized if possible
# Get current locale language
L="$(get_lang_full)"
if [ -z "$L" ]; then
L=en # English by default
fi
COMMENT=$(xmlstarlet sel -t -v '(/application|/component)/name[@xml:lang="'"$L"'"]' <"$1")
if [ -z "$COMMENT" ]; then
# Try again with generic locale
L="$(get_lang_generic)"
COMMENT=$(xmlstarlet sel -t -v '(/application|/component)/name[@xml:lang="'"$L"'"]' <"$1")
if [ -z "$COMMENT" ]; then
# Try again with the default name
COMMENT=$(xmlstarlet sel -t -v '(/application|/component)/name[not(@xml:lang)]' <"$1" | head -1)
fi
fi
NAME="$COMMENT"
# Next look for the summary, localized if possible
# Get current locale language
L="$(get_lang_full)"
if [ -z "$L" ]; then
L=en # English by default
fi
COMMENT=$(xmlstarlet sel -t -v '(/application|/component)/summary[@xml:lang="'"$L"'"]' <"$1")
if [ -z "$COMMENT" ]; then
# Try again with generic locale
L="$(get_lang_generic)"
COMMENT=$(xmlstarlet sel -t -v '(/application|/component)/summary[@xml:lang="'"$L"'"]' <"$1")
if [ -z "$COMMENT" ]; then
# Try again with the default summary
COMMENT=$(xmlstarlet sel -t -v '(/application|/component)/summary[not(@xml:lang)]' <"$1" | head -1)
fi
fi
# Now, use the right combination if more than one is found
if [ -n "$NAME" ]; then
if [ -n "$COMMENT" ]; then
COMMENT="$NAME, $COMMENT"
else
COMMENT="$NAME"
fi
fi
}
# File type: arj (ARJ archive)
# requires: arj
comment_arj () {
COMMENT=$(arj v -- "$1" | sed -e '1,/^Archive created:/d' -e '/^Sequence\/Pathname\/Comment\/Chapters/,$d' | head -3 | trimspace)
}
# File type: asm (assembly language source)
comment_asm () {
COMMENT=$(sed -n -E -e "/^[[:space:]]*(\.)?title/{s/^[[:space:]]*(\.)?title[[:space:]]*//;s/^['\"]*//;s/['\"]*$//;p;}" < "$1")
}
# File type: aup (Audacity Project file)
# requires: xmlstarlet
comment_aup () {
COMMENT=$(xmlstarlet sel -N a=http://audacity.sourceforge.net/xml/ -t -v '/a:project/a:tags/a:tag[@name="TITLE"]/@value' < "$1" 2>/dev/null)
}
# File type: avi (Audio Video Interleave file)
# See https://sourceforge.net/projects/avifile/
# requires: avifile-samples
comment_avi () {
COMMENT=$(avitype -- "$1" | sed -n -e 's/^<AVI reader> *: *InfoChunk Name: *//p')
if [ -z "$COMMENT" ] ; then
comment_ffmpeg "$@"
fi
}
# File type: dar (Disk Archiver archive)
# See http://dar.linux.free.fr/
# requires: dar >= 2.7.0
comment_dar () {
COMMENT=$(LC_ALL=C dar -Q -aheader -l "$1" 2>/dev/null | sed -n -e 's/^User comment *: *//p')
}
# File type: deb (Debian package)
# requires: binutils, gzip, tar, xz, zstd
comment_deb () {
case "$(ar t -- "$1" control.tar.zst control.tar.xz control.tar.gz 2>/dev/null)" in
*zst) COMMENT=$(ar p -- "$1" control.tar.zst | zstd -dc | tar xOf - ./control | sed -n 's/^Description: *//p' ) ;;
*xz) COMMENT=$(ar p -- "$1" control.tar.xz | xz -qdc | tar xOf - ./control | sed -n 's/^Description: *//p' ) ;;
*gz) COMMENT=$(ar p -- "$1" control.tar.gz | gzip -qdc | tar xOf - ./control | sed -n 's/^Description: *//p' ) ;;
*) echo Error: unknown deb compression 1>&2
COMMENT=
;;
esac
}
# File type: docbook (DocBook document)
# requires: xmlstarlet
comment_docbook () {
# Docbook document
COMMENT=$(xmlstarlet sel -t -v /book/bookinfo/title < "$1" 2>/dev/null)
if [ -z "$COMMENT" ] ; then
# Docbook man page
COMMENT=$(xmlstarlet sel -t -v /refentry/refmeta/refentrytitle < "$1" 2>/dev/null)
fi
}
# File type: flac (FLAC audio file)
# requires: flac
comment_flac () {
COMMENT=$(metaflac --export-tags-to=- -- "$1" | sed -n -E -e 's/^TITLE=//p')
if [ -z "$COMMENT" ] ; then
comment_ffmpeg "$@"
fi
}
# File type: flatpakref (Flatpak Reference File)
comment_flatpakref () {
COMMENT=$(sed -n 's/^Title= *//p' < "$1" | head -1)
}
# File type: fodf (Open Document Format flat file)
# requires: xmlstarlet
comment_fodf () {
COMMENT=$(xmlstarlet sel -t -v /office:document/office:meta/dc:title < "$1")
}
# File type: gcode (G-code machine control file)
# gcode has a few codes that could be used as titles, but aren't widely
# supported. Just extract a title from structured comments.
comment_gcode () {
# Supports PrusaSlicer ver. >= 2.2.0, Cura >= ~4.0.0, Fanuc
# Use the name of the first object when more than one are present
COMMENT=$(sed -n -E -e 's/^; printing object ([^ ]+).*$/\1/p' -e 's/^;MESH://p' -e 's/^[^;(]*\<[Oo][0-9]+ *\(([^)]+)\).*$/\1/p' < "$1" | head -1 | trimspace)
if [ -z "$COMMENT" ] ; then
# Supports CamBam http://www.cambam.co.uk/
COMMENT=$(sed -E -n -e '/Made using CamBam/{n;s/^\( *([^ ]+).*$/\1/p' -e '}' < "$1")
fi
if [ -z "$COMMENT" ] ; then
# PyCAM https://pycam.sourceforge.net/
COMMENT=$(sed -n -E -e 's/^;PYCAM-META-DATA: Filename: (.*[/\\])?//p' < "$1" | head -1 | trimspace)
fi
}
# File type: gif (GIF image)