Skip to content

Commit

Permalink
Merge branch 'master' of github.com:louismullie/treat
Browse files Browse the repository at this point in the history
Conflicts:
	Gemfile
  • Loading branch information
louismullie committed Jun 3, 2013
2 parents 3a367f7 + cf8acd2 commit e483b76
Show file tree
Hide file tree
Showing 7 changed files with 25 additions and 4 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ gemspec

gem 'birch'
gem 'schiphol'
gem 'yomu'

group :test do
gem 'rspec'
Expand Down
2 changes: 2 additions & 0 deletions lib/treat/config/data/languages/german.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#encoding: UTF-8

{
dependencies: [
'punkt-segmenter',
Expand Down
2 changes: 1 addition & 1 deletion lib/treat/workers/formatters/readers/autoselect.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ class Treat::Workers::Formatters::Readers::Autoselect
ExtensionRegexp = /^.*?\.([a-zA-Z0-9]{2,5})$/
ImageExtensions = ['gif', 'jpg', 'jpeg', 'png']
DefaultOptions = {
:default_to => 'txt'
:default_to => 'document'
}

# Choose a reader to use.
Expand Down
17 changes: 17 additions & 0 deletions lib/treat/workers/formatters/readers/document.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
require 'yomu'

# This class is a wrapper for Yomu.
# Yomu is a library for extracting text and metadata from files and documents
# using the Apache Tika content analysis toolkit.
class Treat::Workers::Formatters::Readers::Document
# Extract the readable text from any document.
#
# Options: none.
def self.read(document, options = {})
yomu = Yomu.new(document.file)

document.value = yomu.text
document.set :format, yomu.mimetype.extensions.first
document
end
end
4 changes: 2 additions & 2 deletions spec/entities/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ module Treat::Specs::Entities
it "opens the file and reads its " +
"content into a document" do
f = Treat.paths.spec +
'workers/examples/english/mathematicians/leibniz.txt'
'workers/examples/english/mathematicians/pythagoras.docx'
d = Treat::Entities::Document.build(f)
d.should be_an_instance_of Treat::Entities::Document
d.to_s.index('Gottfried Leibniz').should_not eql nil
d.to_s.index('Pythagoras of Samos').should_not eql nil
end
end

Expand Down
Binary file not shown.
3 changes: 2 additions & 1 deletion treat.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ Gem::Specification.new do |s|
# Runtime dependencies
s.add_runtime_dependency 'schiphol'
s.add_runtime_dependency 'birch'
s.add_runtime_dependency 'yomu'

# Development dependencies
s.add_development_dependency 'rspec'
Expand All @@ -36,4 +37,4 @@ Gem::Specification.new do |s|
To complete the installation, run `require treat` in an IRB
terminal, followed by `Treat::Core::Installer.install`. }

end
end

0 comments on commit e483b76

Please sign in to comment.