Skip to content

Add action to generate pdf diff #49

Add action to generate pdf diff

Add action to generate pdf diff #49

Workflow file for this run

name: Generate Rendered Diff PDFs
on:
pull_request:
types: [opened, synchronize, reopened]
branches:
- main
permissions:
contents: write
issues: write
pull-requests: write
actions: read
jobs:
generate_pdfs:
runs-on: ubuntu-latest
outputs:
files: ${{ steps.generate_files.outputs.files }}
steps:
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Fetch main branch
run: |
git fetch origin main
- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y \
pandoc \
texlive-latex-base \
texlive-latex-extra \
texlive-fonts-recommended \
texlive-xetex \
texlive-fonts-extra \
latexdiff \
latexmk \
ghostscript
- name: Generate PDF diffs
id: generate_files
shell: bash
run: |
git fetch origin main
MODIFIED_FILES=$(git diff --name-only origin/main...HEAD -- '*.md')
echo "Modified Markdown files:"
echo "$MODIFIED_FILES"
# Create necessary directories
mkdir -p old_version new_version diffs pdfs
# Create a custom LaTeX template
TEMPLATE=.github/workflows/template/template.tex
pandoc -D latex > default_template.tex
TEMPLATE=default_template.tex
for FILE in $MODIFIED_FILES; do
echo "Processing $FILE"
# Extract old version from main branch
git show origin/main:"$FILE" > old_version/"$(basename "$FILE")"
# Copy new version
cp "$FILE" new_version/
# Convert both versions to LaTeX using the custom template
pandoc old_version/"$(basename "$FILE")" --from markdown --to latex \
--template="$TEMPLATE" -o old_version/"$(basename "$FILE" .md)".tex
pandoc new_version/"$(basename "$FILE")" --from markdown --to latex \
--template="$TEMPLATE" -o new_version/"$(basename "$FILE" .md)".tex
# Run latexdiff with appropriate options
latexdiff \
--encoding=utf8 \
old_version/"$(basename "$FILE" .md)".tex \
new_version/"$(basename "$FILE" .md)".tex \
> diffs/"$(basename "$FILE" .md)"_diff.tex
# Compile the diffed LaTeX file to PDF using xelatex
cd diffs
latexmk -pdf -xelatex -interaction=nonstopmode -quiet -f "$(basename "$FILE" .md)"_diff.tex
cd ..
# Move the generated PDF to the pdfs directory
mv diffs/"$(basename "$FILE" .md)"_diff.pdf pdfs/
done
if [ -z "$MODIFIED_FILES" ]; then
echo "files=[]" >> $GITHUB_OUTPUT
else
MODIFIED_FILES_JSON=$(printf '%s\n' "$MODIFIED_FILES" | jq -R . | jq -s .)
# Use the correct syntax to write multi-line or complex outputs
echo "files<<EOF" >> $GITHUB_OUTPUT
echo "$MODIFIED_FILES_JSON" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
fi
- name: Upload PDFs
uses: actions/upload-artifact@v4
if: steps.generate_files.outputs.files != '[]' # Only run if there are modified files
with:
name: document_changes
path: pdfs/
retention-days: 90
- name: Comment on PR with download links
uses: actions/github-script@v7
env:
MODIFIED_FILES: ${{ steps.generate_files.outputs.files }}
with:
debug: true
script: |
const { issue, repo, sha } = context;
const run_id = process.env.GITHUB_RUN_ID;
// Unique identifier to find the bot's comment
const botCommentIdentifier = '<!-- GENERATED_BY_GITHUB_ACTION_PDF_DIFF -->';
const modifiedFilesJSON = process.env.MODIFIED_FILES
console.log("modifiedFilesJSON", modifiedFilesJSON);
let modifiedFiles = [];
try {
modifiedFiles = JSON.parse(modifiedFilesJSON);
} catch (error) {
console.error("Failed to parse modified files:", error);
}
console.log("modifiedFiles", modifiedFiles);
let body;
if (modifiedFiles.length === 0) {
// No artifacts were generated
body = `${botCommentIdentifier}\n:warning: No rendered diff PDFs were generated because no Markdown files were modified.`;
} else {
// Construct the comment body
body = `${botCommentIdentifier}\n:page_facing_up: Rendered diff PDFs have been generated for the following modified files:\n`;
for (const modifiedFile of modifiedFiles) {
body += `- ${modifiedFile}\n`;
}
// Add a comment about how to download in case links expire
const artifactUrl = `https://github.com/${repo.owner}/${repo.repo}/actions/runs/${run_id}#artifacts`;
body += `\n**Note**: You can download these files from the [Artifacts section of the workflow run](${artifactUrl}).`;
body += "\nThese files expire after 90 days."
// Add a commend about how to read latexdiff
body += "\n\n**How to read these PDFs**:\n"
body += "- Added text is wavy underlined and blue.\n"
body += "- Discarded text is struck out and red.\n"
}
body += `\n_Last updated at ${new Date().toUTCString()} for commit ${sha.substring(0, 7)}_`;
console.log("Listing comments for ", {
owner: repo.owner,
repo: repo.repo,
issue_number: issue.number,
});
// Fetch existing comments on the PR
const { data: comments } = await github.rest.issues.listComments({
owner: repo.owner,
repo: repo.repo,
issue_number: issue.number,
});
// Find the comment made by the bot with the identifier
const botComment = comments.find(
comment =>
comment.user.login === 'github-actions[bot]' &&
comment.body.includes(botCommentIdentifier)
);
if (botComment) {
console.log("Found existing comment. Updating");
// Update the existing comment
await github.rest.issues.updateComment({
owner: repo.owner,
repo: repo.repo,
comment_id: botComment.id,
body: body,
});
} else {
console.log("No existing comment. Creating new");
// Create a new comment
await github.rest.issues.createComment({
owner: repo.owner,
repo: repo.repo,
issue_number: issue.number,
body: body,
});
}