Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

epub-parser: find siblings from parent's sibling node aswell #151

Merged
merged 1 commit into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions app/src/main/java/com/starry/myne/epub/EpubUtils.kt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package com.starry.myne.epub
import org.w3c.dom.Document
import org.w3c.dom.Element
import org.w3c.dom.Node
import org.jsoup.nodes.Node as JsoupNode
import org.w3c.dom.NodeList
import org.xml.sax.InputSource
import java.io.File
Expand Down Expand Up @@ -58,3 +59,13 @@ fun Node.getAttributeValue(attribute: String): String? =

val NodeList.elements get() = (0..length).asSequence().mapNotNull { item(it) as? Element }
val Node.childElements get() = childNodes.elements

fun JsoupNode.nextSiblingNodes(): List<org.jsoup.nodes.Node> {
val siblings = mutableListOf<org.jsoup.nodes.Node>()
var nextSibling = nextSibling()
while (nextSibling != null) {
siblings.add(nextSibling)
nextSibling = nextSibling.nextSibling()
}
return siblings
}
48 changes: 41 additions & 7 deletions app/src/main/java/com/starry/myne/epub/EpubXMLFileParser.kt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import android.util.Log
import org.jsoup.Jsoup
import org.jsoup.nodes.TextNode
import java.io.File
import org.jsoup.nodes.Node
import kotlin.io.path.invariantSeparatorsPathString

/**
Expand Down Expand Up @@ -86,7 +87,7 @@ class EpubXMLFileParser(
val fragmentElement = document.selectFirst("#$fragmentId")
title = fragmentElement?.selectFirst("h1, h2, h3, h4, h5, h6")?.text() ?: ""
val bodyBuilder = StringBuilder()
var currentNode: org.jsoup.nodes.Node? = fragmentElement?.nextSibling()
var currentNode: Node? = fragmentElement?.nextSibling()
val nextFragmentIdElement = if (nextFragmentId != null) {
document.selectFirst("#$nextFragmentId")
} else {
Expand All @@ -96,7 +97,7 @@ class EpubXMLFileParser(

while (currentNode != null && currentNode != nextFragmentIdElement) {
bodyBuilder.append(getNodeStructuredText(currentNode) + "\n\n")
currentNode = currentNode.nextSibling()
currentNode = getNextSibling(currentNode)
}
bodyContent = bodyBuilder.toString()
}
Expand All @@ -115,6 +116,39 @@ class EpubXMLFileParser(
)
}

private fun getNextSibling(currentNode: Node?): Node? {
var nextSibling: Node? = currentNode?.nextSibling()

if (nextSibling == null) {
var parentNode = currentNode?.parent()
while (parentNode != null) {
nextSibling = parentNode.nextSibling()
if (nextSibling != null) {
// If the parent's next sibling is not null, traverse its descendants
// to find the next node
return traverseDescendants(nextSibling)
}
parentNode = parentNode.parent()
}
}

return nextSibling
}

private fun traverseDescendants(node: Node): Node? {
val children = node.childNodes()
if (children.isNotEmpty()) {
return children.first()
}

val siblings = node.nextSiblingNodes()
if (siblings.isNotEmpty()) {
return traverseDescendants(siblings.first())
}

return null
}

fun parseAsImage(absolutePathImage: String): String {
// Use run catching so it can be run locally without crash
val bitmap = zipFile[absolutePathImage]?.data?.runCatching {
Expand All @@ -130,7 +164,7 @@ class EpubXMLFileParser(
}

// Rewrites the image node to xml for the next stage.
private fun declareImgEntry(node: org.jsoup.nodes.Node): String {
private fun declareImgEntry(node: Node): String {
val attrs = node.attributes().associate { it.key to it.value }
val relPathEncoded = attrs["src"] ?: attrs["xlink:href"] ?: ""

Expand All @@ -143,8 +177,8 @@ class EpubXMLFileParser(
return parseAsImage(absolutePathImage)
}

private fun getPTraverse(node: org.jsoup.nodes.Node): String {
fun innerTraverse(node: org.jsoup.nodes.Node): String =
private fun getPTraverse(node: Node): String {
fun innerTraverse(node: Node): String =
node.childNodes().joinToString("") { child ->
when {
child.nodeName() == "br" -> "\n"
Expand All @@ -159,7 +193,7 @@ class EpubXMLFileParser(
return if (paragraph.isNotEmpty()) "$paragraph\n\n" else ""
}

private fun getNodeTextTraverse(node: org.jsoup.nodes.Node): String {
private fun getNodeTextTraverse(node: Node): String {
val children = node.childNodes()
if (children.isEmpty())
return ""
Expand All @@ -181,7 +215,7 @@ class EpubXMLFileParser(
}
}

private fun getNodeStructuredText(node: org.jsoup.nodes.Node): String {
private fun getNodeStructuredText(node: Node): String {
val children = node.childNodes()
if (children.isEmpty())
return ""
Expand Down
Loading