Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support writing password protected workbooks (v2 data source only) #626

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 43 additions & 3 deletions src/main/scala/com/crealytics/spark/v2/excel/ExcelGenerator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

package com.crealytics.spark.v2.excel

import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream
import org.apache.hadoop.fs.Path
import org.apache.poi.hssf.usermodel.HSSFWorkbook
import org.apache.poi.ss.usermodel.Cell
Expand All @@ -27,8 +28,13 @@ import org.apache.poi.ss.usermodel.Workbook
import org.apache.poi.xssf.usermodel.XSSFWorkbook
import org.apache.poi.ss.util.WorkbookUtil
import org.apache.hadoop.conf.Configuration
import org.apache.poi.openxml4j.opc.OPCPackage
import org.apache.poi.poifs.crypt.{EncryptionInfo, EncryptionMode}
import org.apache.poi.poifs.filesystem.POIFSFileSystem
import org.apache.spark.sql.catalyst.util.DateTimeUtils

import java.io.OutputStream

class ExcelGenerator(val path: String, val dataSchema: StructType, val conf: Configuration, val options: ExcelOptions) {
/* Prepare target Excel workbook, sheet and where to write to */
private val wb: Workbook =
Expand Down Expand Up @@ -161,8 +167,42 @@ class ExcelGenerator(val path: String, val dataSchema: StructType, val conf: Con
val fs = hdfsPath.getFileSystem(conf)
fs.create(hdfsPath, true)
}
wb.write(fos)
wb.close()
fos.close()
try {
options.workbookPassword match {
case Some(pass) if !pass.isEmpty => encrypt(wb, pass, fos)
case _ => wb.write(fos)
}
} finally {
wb.close()
fos.close()
}
}

// this may need to be enhanced to use a temp file instead of UnsynchronizedByteArrayOutputStream to save memory
private def encrypt(wb: Workbook, password: String, outputStream: OutputStream): Unit = {
val fs = new POIFSFileSystem
try {
val info = new EncryptionInfo(EncryptionMode.agile)
val enc = info.getEncryptor
enc.confirmPassword(password)
val bos = new UnsynchronizedByteArrayOutputStream()
try {
wb.write(bos)
val opc = OPCPackage.open(bos.toInputStream)
val encStream = enc.getDataStream(fs)
try {
opc.save(encStream)
} finally {
opc.close()
encStream.close()
}
} finally {
bos.close()
}
// Write out the encrypted version
fs.writeFilesystem(outputStream)
} finally {
fs.close()
}
}
}