Skip to content

Commit

Permalink
Use FFmpeg for default decoder
Browse files Browse the repository at this point in the history
This uses the more stable and mature FFmpeg decoders for images, while
still allowing for the use of the internal decoder, which has been
marked as experimental. Funnily enough, in some cases the internal
decoder gives a more accurate output, particularly for timestamps than
FFmpeg.

Signed-off-by: Ethan Dye <[email protected]>
  • Loading branch information
ecdye committed Oct 10, 2024
1 parent aaccf2f commit 845a40e
Show file tree
Hide file tree
Showing 27 changed files with 210 additions and 30 deletions.
9 changes: 8 additions & 1 deletion Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,17 @@ let package = Package(
.package(url: "https://github.com/apple/swift-argument-parser.git", from: "1.5.0")
],
targets: [
.systemLibrary(
name: "CFFmpeg",
pkgConfig: "libavformat libavcodec libavutil",
providers: [
.brew(["ffmpeg"])
]),
.executableTarget(
name: "macSubtitleOCR",
dependencies: [
.product(name: "ArgumentParser", package: "swift-argument-parser")
.product(name: "ArgumentParser", package: "swift-argument-parser"),
"CFFmpeg"
]),
.testTarget(
name: "macSubtitleOCRTests",
Expand Down
1 change: 1 addition & 0 deletions Sources/CFFmpeg
Submodule CFFmpeg added at c41224
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
157 changes: 157 additions & 0 deletions Sources/macSubtitleOCR/Subtitles/FFmpeg.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
//
// FFmpeg.swift
// macSubtitleOCR
//
// Created by Ethan Dye on 10/9/24.
// Copyright © 2024 Ethan Dye. All rights reserved.
//

import CFFmpeg
import Foundation
import os

struct FFmpeg {
// MARK: - Properties

private var logger = Logger(subsystem: "github.ecdye.macSubtitleOCR", category: "FFmpeg")
private(set) var subtitles = [Subtitle]()

// MARK: - Lifecycle

init(_ sub: String) throws {
var fmtCtx: UnsafeMutablePointer<AVFormatContext>?
var codecCtx: UnsafeMutablePointer<AVCodecContext>?

// Open the input file
if avformat_open_input(&fmtCtx, sub, nil, nil) != 0 {
fatalError("Could not open input file")
}

// Retrieve stream information
if avformat_find_stream_info(fmtCtx, nil) < 0 {
fatalError("Could not find stream info")
}

var subtitleStreamIndex: Int?
var timeBase = 0.0
var subtitleTimeBase: AVRational?
for i in 0 ..< Int(fmtCtx!.pointee.nb_streams) {
let stream = fmtCtx!.pointee.streams[i]
if stream!.pointee.codecpar.pointee.codec_type == AVMEDIA_TYPE_SUBTITLE,
stream!.pointee.codecpar.pointee.codec_id == AV_CODEC_ID_DVD_SUBTITLE ||
stream!.pointee.codecpar.pointee.codec_id == AV_CODEC_ID_HDMV_PGS_SUBTITLE {
subtitleStreamIndex = i
subtitleTimeBase = stream!.pointee.time_base
if stream!.pointee.codecpar.pointee.codec_id == AV_CODEC_ID_DVD_SUBTITLE {
timeBase = 1000
} else {
timeBase = 900000000
}
break
}
}

guard let subtitleStreamIndex else {
fatalError("Could not find a VobSub subtitle stream")
}
guard let codec = avcodec_find_decoder(fmtCtx!.pointee.streams[subtitleStreamIndex]!.pointee.codecpar.pointee
.codec_id) else {
fatalError("Could not find subtitle decoder")
}
codecCtx = avcodec_alloc_context3(codec)
guard codecCtx != nil else {
fatalError("Could not allocate codec context")
}
if avcodec_parameters_to_context(codecCtx,
fmtCtx!.pointee.streams[subtitleStreamIndex]!.pointee.codecpar) < 0 {
fatalError("Failed to copy codec parameters")
}
if avcodec_open2(codecCtx, codec, nil) < 0 {
fatalError("Could not open codec")
}
var packet = av_packet_alloc()
var subtitle = AVSubtitle()

// Read frames from the subtitle stream
while av_read_frame(fmtCtx, packet) >= 0 {
if packet!.pointee.stream_index == subtitleStreamIndex {
var gotSubtitle: Int32 = 0

// Decode subtitle packet
let ret = avcodec_decode_subtitle2(codecCtx, &subtitle, &gotSubtitle, packet)
if ret < 0 {
logger.warning("Error decoding subtitle, skipping...")
continue
}

if gotSubtitle != 0 {
for i in 0 ..< Int(subtitle.num_rects) {
let rect = subtitle.rects[i]!
let sub = extractImageData(from: rect)
let pts = convertPTSToTimeInterval(
pts: packet!.pointee.pts,
timeBase: subtitleTimeBase!)
sub.startTimestamp = pts + TimeInterval(subtitle.start_display_time) / timeBase
sub.endTimestamp = pts + TimeInterval(subtitle.end_display_time) / timeBase
logger.debug("Start timestamp: \(sub.startTimestamp!), End timestamp: \(sub.endTimestamp!)")
subtitles.append(sub)
}
let count = subtitles.count
logger.debug("Got subtitle for index: \(count)")

avsubtitle_free(&subtitle)
}
}

av_packet_unref(packet)
}

// Clean up
avcodec_free_context(&codecCtx) // This will set codecCtx to nil
avformat_close_input(&fmtCtx)
av_packet_free(&packet)
}

func extractImageData(from rect: UnsafeMutablePointer<AVSubtitleRect>) -> Subtitle {
let subtitle = Subtitle(numberOfColors: Int(rect.pointee.nb_colors))

// Check if the subtitle is an image (bitmap)
if rect.pointee.type == SUBTITLE_BITMAP {
// Extract palette (if available)
if rect.pointee.nb_colors > 0, let paletteData = rect.pointee.data.1 {
if subtitle.imagePalette == nil {
subtitle.imagePalette = []
}
for i in 0 ..< 256 {
let r = paletteData[i * 4 + 0]
let g = paletteData[i * 4 + 1]
let b = paletteData[i * 4 + 2]
let a = paletteData[i * 4 + 3]

subtitle.imagePalette?.append(contentsOf: [r, g, b, a])
}
}

// Extract image data (bitmap)
subtitle.imageWidth = Int(rect.pointee.w)
subtitle.imageHeight = Int(rect.pointee.h)
subtitle.imageXOffset = Int(rect.pointee.linesize.0)
logger.debug("Image size: \(subtitle.imageWidth!)x\(subtitle.imageHeight!)")

let imageSize = (subtitle.imageXOffset ?? 0) * (subtitle.imageHeight ?? 0)
if let bitmapData = rect.pointee.data.0 {
let buffer = UnsafeBufferPointer(start: bitmapData, count: imageSize)
subtitle.imageData = Data(buffer)
}
}

return subtitle
}

func convertPTSToTimeInterval(pts: Int64, timeBase: AVRational) -> TimeInterval {
// Time base num is the number of units in one second.
// Time base den is the number of units in one second divided by the base.
let seconds = Double(pts) * av_q2d(timeBase)
return TimeInterval(seconds)
}
}
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ struct RLEData {
}
if y > (height / 2), height % 2 != 0, !corrected {
corrected = true
decodedLines.removeLast(width * 2 * evenOffset!)
decodedLines.removeLast(width * 2 * (evenOffset! - 1))
}
}

Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// VobSub.swift
// macSubtitleOCR
//
// Created by Ethan Dye on 10/4/24.
// Created by Ethan Dye on 10/9/24.
// Copyright © 2024 Ethan Dye. All rights reserved.
//

Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ struct macSubtitleOCR: ParsableCommand {
@Option(wrappedValue: "en", help: "The input image language(s)")
var language: String

@Flag(help: "Use internal decoder (experimental)")
var internalDecoder = false

@Flag(help: "Save image files for subtitle track (optional)")
var saveImages = false

Expand All @@ -50,33 +53,39 @@ struct macSubtitleOCR: ParsableCommand {
var results: [macSubtitleOCRResult] = []
let outputDirectory = URL(fileURLWithPath: outputDirectory)

if input.hasSuffix(".sub") || input.hasSuffix(".idx") {
let sub = try VobSub(
input.replacingOccurrences(of: ".idx", with: ".sub"),
input.replacingOccurrences(of: ".sub", with: ".idx"))
let result = try processSubtitles(subtitles: sub.subtitles, trackNumber: 0)
results.append(result)
} else if input.hasSuffix(".mkv") {
let mkvStream = MKVSubtitleExtractor(filePath: input)
try mkvStream.parseTracks(codec: "S_HDMV/PGS")
for track in mkvStream.tracks {
logger.debug("Found subtitle track: \(track.trackNumber), Codec: \(track.codecId)")
if saveSubtitleFile {
intermediateFiles[track.trackNumber] = try mkvStream.getSubtitleTrackData(
trackNumber: track.trackNumber,
outputDirectory: outputDirectory)!
}
if internalDecoder {
if input.hasSuffix(".sub") || input.hasSuffix(".idx") {
let sub = try VobSub(
input.replacingOccurrences(of: ".idx", with: ".sub"),
input.replacingOccurrences(of: ".sub", with: ".idx"))
let result = try processSubtitles(subtitles: sub.subtitles, trackNumber: 0)
results.append(result)
} else if input.hasSuffix(".mkv") {
let mkvStream = MKVSubtitleExtractor(filePath: input)
try mkvStream.parseTracks(codec: "S_HDMV/PGS")
for track in mkvStream.tracks {
logger.debug("Found subtitle track: \(track.trackNumber), Codec: \(track.codecId)")
if saveSubtitleFile {
intermediateFiles[track.trackNumber] = try mkvStream.getSubtitleTrackData(
trackNumber: track.trackNumber,
outputDirectory: outputDirectory)!
}

// Open the PGS data stream
let PGS = try PGS(mkvStream.tracks[track.trackNumber].trackData)
// Open the PGS data stream
let PGS = try PGS(mkvStream.tracks[track.trackNumber].trackData)

let result = try processSubtitles(subtitles: PGS.subtitles, trackNumber: track.trackNumber)
let result = try processSubtitles(subtitles: PGS.subtitles, trackNumber: track.trackNumber)
results.append(result)
}
} else if input.hasSuffix(".sup") {
// Open the PGS data stream
let PGS = try PGS(URL(fileURLWithPath: input))
let result = try processSubtitles(subtitles: PGS.subtitles, trackNumber: 0)
results.append(result)
}
} else if input.hasSuffix(".sup") {
// Open the PGS data stream
let PGS = try PGS(URL(fileURLWithPath: input))
let result = try processSubtitles(subtitles: PGS.subtitles, trackNumber: 0)
} else {
let ffmpeg = try FFmpeg(input)
let result = try processSubtitles(subtitles: ffmpeg.subtitles, trackNumber: 0)
results.append(result)
}

Expand Down Expand Up @@ -140,6 +149,11 @@ struct macSubtitleOCR: ParsableCommand {
continue
}

if subIndex < subtitles.count, subtitles[subIndex].startTimestamp! <= subtitle.endTimestamp! {
logger.warning("Fixing subtitle index \(subIndex) end timestamp!")
subtitle.endTimestamp = subtitles[subIndex].startTimestamp! - 0.1
}

guard let subImage = subtitle.createImage()
else {
logger.info("Could not create image for index \(subIndex)! Skipping...")
Expand Down
File renamed without changes.
File renamed without changes.
9 changes: 5 additions & 4 deletions Tests/macSubtitleOCRTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ import Testing
let json0Match = similarityPercentage(jsonExpectedOutput, json0ActualOutput)
let json1Match = similarityPercentage(jsonExpectedOutput, json1ActualOutput)

#expect(srt0Match >= 95.0)
#expect(srt1Match >= 95.0)
// Lower threshold due to timestamp differences
#expect(srt0Match >= 90.0)
#expect(srt1Match >= 90.0)
#expect(json0Match >= 95.0)
#expect(json1Match >= 95.0)
}
Expand All @@ -64,7 +65,7 @@ import Testing
let srtMatch = similarityPercentage(srtExpectedOutput, srtActualOutput)
let jsonMatch = similarityPercentage(jsonExpectedOutput, jsonActualOutput)

#expect(srtMatch >= 95.0)
#expect(srtMatch >= 90.0) // Lower threshold due to timestamp differences
#expect(jsonMatch >= 95.0)
}

Expand All @@ -88,6 +89,6 @@ import Testing
let srtMatch = similarityPercentage(srtExpectedOutput, srtActualOutput)
let jsonMatch = similarityPercentage(jsonExpectedOutput, jsonActualOutput)

#expect(srtMatch >= 90.0) // Lower threshold due to end timestamp differences
#expect(srtMatch >= 90.0) // Lower threshold due to timestamp differences
#expect(jsonMatch >= 95.0)
}

0 comments on commit 845a40e

Please sign in to comment.