This repository has been archived by the owner on Sep 26, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
doc2otl
executable file
·78 lines (71 loc) · 1.64 KB
/
doc2otl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/ruby
# $Id$
require 'rexml/document'
$context = { :ignore_whitespace_nodes => :all }
def levelAndNext(h)
retval = [nil, nil]
case h.name
when 'chapter'
retval = [0, 'sect1']
when /sect([1-4])/
level = $1.to_i
retval = [level, 'sect' + (level+1).to_s]
when 'sect5'
retval = [5, 'para']
else
$stderr.puts(h.inspect)
end
return *retval
end
def doPara(p, level, io)
text = p.text
io.print("\t" * level, '| ', (text ? text.strip : ''), "\n")
end
def deepTextOfElement(parent)
text = []
parent.each_element do |el|
if el.has_text?
text.push(el.text.to_s)
else
text.push(deepTextOfElement(el))
end
end
text.join(' ')
end
def doTitle(t, level, io)
text = deepTextOfElement(t)
io.print("\t" * level, (text ? text.strip : ''), "\n")
end
def doHead(h, io)
(level, nextLevel) = levelAndNext(h)
h.elements.each do |el|
case el.name
when 'title'
doTitle(el, level, io)
when 'para'
doPara(el, level, io)
when nextLevel
doHead(el, io)
else
$stderr.puts(el.inspect)
end
end
end
# book/title
# book/bookinfo (title, author, date, corpname)
# book/chapter/title Heading1 {emphasis?}
# | book/chapter/para Text1
# book/chapter/sect1/title Heading 2
# | book/chapter/sect1/para Text 2
# book/chapter/sect2/title Heading 3
# | book/chapter/sect2/para Text 3
#
# ... through sect5 (level 6)
#
# chapter includes sect1
# sectN includes sectN+1
#
IO.popen('antiword -x db ' + ARGV.collect { |arg| "\"#{arg}\"" }.join(' ')) do |io|
xml = REXML::Document.new(io, $context)
doHead(xml.root.elements['chapter'], $stdout)
end