Add unjt test for convert_tokens

lebebr01 · Mar 30, 2018 · 27ecb5a · 27ecb5a
1 parent aee2d3d
commit 27ecb5a
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 0 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,11 @@
+# pdfsearch 0.2.0
+
+* Added `remove_hyphen` argument to remove hyphen from words that wrap onto two subsequent lines
+* Added `convert_tokens` function which uses the tokenizers R package to convert text to tokens.
+* Created vignette with expanded details
+* Created JOSS paper for submission
+* Created code of conduct and contributing policies.
+
 # pdfsearch 0.1.1
 
 * Added additional examples of usage to documentation.

diff --git a/tests/testthat/test_convert_tokens.r b/tests/testthat/test_convert_tokens.r
@@ -0,0 +1,14 @@
+context("Test token conversion")
+
+test_that('error for multiple keywords ignore case', {
+  path <- system.file('pdf', '1610.00147.pdf', package = 'pdfsearch')
+  expect_output(str(convert_tokens(x = path, 
+                              path = TRUE)), "List of 31"
+  )
+
+  page_one_words <- length(convert_tokens(x = path, path = TRUE)[[1]][[1]])
+
+  expect_lt(length(convert_tokens(x = path, path = TRUE, token_function = tokenizers::tokenize_lines)),
+            page_one_words)
+
+})