Skip to content

Commit

Permalink
Add some preliminary tests for the new ResultIterator functionality.
Browse files Browse the repository at this point in the history
  • Loading branch information
forty2 committed Apr 25, 2017
1 parent 80d8acf commit 14a9dc4
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 0 deletions.
Binary file added src/Tesseract.Tests/Data/Ocr/Fonts.tif
Binary file not shown.
98 changes: 98 additions & 0 deletions src/Tesseract.Tests/ResultIteratorTests/FontAttributesTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace Tesseract.Tests.ResultIteratorTests
{
[TestFixture]
public class FontAttributesTests : TesseractTestBase
{
private TesseractEngine Engine { get; set; }
private Pix TestImage { get; set; }

[SetUp]
public void Init()
{
Engine = CreateEngine();
TestImage = LoadTestPix("Ocr\\Fonts.tif");
}

[TearDown]
public void Dispose()
{
if (TestImage != null) {
TestImage.Dispose();
TestImage = null;
}

if (Engine != null) {
Engine.Dispose();
Engine = null;
}
}

private void Debug(ResultIterator iter, FontAttributes fontAttrs) {
Console.WriteLine("Word: {0}", iter.GetText(PageIteratorLevel.Word));
Console.WriteLine("IsBold: {0}", fontAttrs.FontInfo.IsBold);
Console.WriteLine("IsItalic: {0}", fontAttrs.FontInfo.IsItalic);
Console.WriteLine("IsFixedPitch: {0}", fontAttrs.FontInfo.IsFixedPitch);
Console.WriteLine("IsSerif: {0}", fontAttrs.FontInfo.IsSerif);

Console.WriteLine("IsUnderlined: {0}", fontAttrs.IsUnderlined);
Console.WriteLine("IsSmallCaps: {0}", fontAttrs.IsSmallCaps);
Console.WriteLine("Point size: {0}", fontAttrs.PointSize);
}

#region Tests
[Test]
public void GetWordFontAttributesWorks()
{
using (var page = Engine.Process(TestImage))
using (var iter = page.GetIterator()) {
// font attributes come in this order in the test image:
// bold, italic, monospace, serif, smallcaps
//
// there is no test for underline because in 3.04 IsUnderlined is
// hard-coded to "false". See: https://github.com/tesseract-ocr/tesseract/blob/3.04/ccmain/ltrresultiterator.cpp#182

var fontAttrs = iter.GetWordFontAttributes();
Assert.That(fontAttrs.FontInfo.IsBold, Is.True);
Assert.That(iter.GetWordRecognitionLanguage(), Is.EqualTo("eng"));
Assert.That(iter.GetWordIsFromDictionary(), Is.True);
iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word);

fontAttrs = iter.GetWordFontAttributes();
Assert.That(fontAttrs.FontInfo.IsItalic, Is.True);
iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word);

fontAttrs = iter.GetWordFontAttributes();
Assert.That(fontAttrs.FontInfo.IsFixedPitch, Is.True);
iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word);

fontAttrs = iter.GetWordFontAttributes();
Assert.That(fontAttrs.FontInfo.IsSerif, Is.True);
iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word);

fontAttrs = iter.GetWordFontAttributes();
Assert.That(fontAttrs.IsSmallCaps, Is.True);
iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word);

Assert.That(iter.GetWordIsNumeric(), Is.True);

iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word);
iter.Next(PageIteratorLevel.Word, PageIteratorLevel.Symbol);

Assert.That(iter.GetSymbolIsSuperscript(), Is.True);

iter.Next(PageIteratorLevel.TextLine, PageIteratorLevel.Word);
iter.Next(PageIteratorLevel.Word, PageIteratorLevel.Symbol);

Assert.That(iter.GetSymbolIsSubscript(), Is.True);
}
}
#endregion Tests
}
}
4 changes: 4 additions & 0 deletions src/Tesseract.Tests/Tesseract.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
<Compile Include="Leptonica\PixTests\PixDataAccessTests.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="ResultIteratorTests\OfAnEmptyPixTests.cs" />
<Compile Include="ResultIteratorTests\FontAttributesTests.cs" />
<Compile Include="ResultRendererTests.cs" />
<Compile Include="TesseractResultSet.cs" />
<Compile Include="TesseractTestBase.cs" />
Expand Down Expand Up @@ -161,6 +162,9 @@
<Content Include="Data\Ocr\PSM_SingleWord.png">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="Data\Ocr\Fonts.tif">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="Data\Ocr\blank.tif">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
Expand Down

0 comments on commit 14a9dc4

Please sign in to comment.