-
Notifications
You must be signed in to change notification settings - Fork 1
/
check-links
executable file
·73 lines (66 loc) · 2.36 KB
/
check-links
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/bash
set -e -u -f -o pipefail
main() {
WEBSITE="$1"
DIR="$(mktemp -d /tmp/check-links-XXXXXXXXXX)"
LOG="${DIR}/wget.log"
STATUS_FILE="${DIR}/request_status"
readonly WEBSITE DIR LOG STATUS_FILE
cd "${DIR}"
reject_regex="/xmlrpc.php|/blog/Smarter_HTTP_redirects/"
wget \
--output-file wget.log \
--execute robots=off \
--reject-regex "${reject_regex}" \
--content-on-error \
--wait 1 \
--recursive \
--level 10 \
--page-requisites \
"${WEBSITE}" || true
# -- marks the start of a request; replace the hold space with that line.
# When the request status is seen, append the line to the hold space, swap the
# hold and pattern spaces, replace newline with space to join the lines, then
# print the pattern space. Replace the hold space with the pattern space
# so we have the URL for when the request fails due to auth and succeeds on
# retry; we'll get an output line for the failure, then an output line for
# success with both messages, but that should be handled properly by the retry
# logic in check-links.awk.
# TODO: add a test for that behaviour.
sed -n \
-e '/^--/ h' \
-e '/HTTP request sent, awaiting response.../ { H; g; s/\n/ /; p; h; }' \
"${LOG}" > "${STATUS_FILE}"
PROBLEMS="$(awk -f "${HOME}/bin/check-links.awk" "${STATUS_FILE}")"
# Check for common error messages too.
PROBLEMS+="$(grep -r -F -e 'Fatal error' "${DIR}" || true)"
if [[ -n "${PROBLEMS}" || -t 0 ]]; then
if [[ -n "${PROBLEMS}" ]]; then
echo "Problems found:"
echo "${PROBLEMS}"
else
echo "No problems found :)"
fi
echo "See ${LOG} and the contents of ${DIR} for further investigation"
else
rm -rf "${DIR}"
fi
if [[ -n "${PROBLEMS}" ]]; then
return 1
fi
return 0
}
# Only run main if being executed directly; do nothing if sourced for testing.
# Use basename so that it can be run as 'bash -x ./check-links ...'.
if [[ "$(basename "${BASH_SOURCE[0]}")" == "$(basename "${0}")" ]]; then
if [ "$#" != 1 ] || [ "${1:-}" == '-h' ] || [ "${1:-}" == '--help' ]; then
echo "Usage: $0 http://SOME-WEBSITE/" >&2
echo "Links outside SOME-WEBSITE will not be checked." >&2
echo "Page contents will be downloaded and saved for investigation." >&2
exit 1
fi
if [[ ! -t 0 && -z "${SKIP_STARTUP_SLEEP:-}" ]]; then
sleep $((RANDOM % 600))
fi
main "$@"
fi