From eb1ebee23b26116b899a436880aac9c0210d7fb9 Mon Sep 17 00:00:00 2001 From: Eric Schmidt Date: Fri, 20 Mar 2020 12:38:19 -0700 Subject: [PATCH] feat: adds beta samples * feat: adds remaining samples (parse form synchronous, parse table synchronous, parse from with NL model, set endpoint) * fix: adds AutoML NL model to tests * fix: removes forEach() constructions --- document-ai/batch_parse_form.js | 140 +++++++++++++++++ document-ai/batch_parse_table.js | 148 ++++++++++++++++++ document-ai/parse_form.js | 102 ++++++++++++ document-ai/parse_table.js | 112 +++++++++++++ document-ai/parse_with_model.js | 76 +++++++++ document-ai/set_endpoint.js | 86 ++++++++++ ...eForm.test.js => batch_parse_form.test.js} | 7 +- ...able.test.js => batch_parse_table.test.js} | 9 +- document-ai/test/parse_form.test.js | 35 +++++ document-ai/test/parse_table.test.js | 35 +++++ document-ai/test/parse_with_model.test.js | 41 +++++ document-ai/test/set_endpoint.test.js | 35 +++++ 12 files changed, 819 insertions(+), 7 deletions(-) create mode 100644 document-ai/batch_parse_form.js create mode 100644 document-ai/batch_parse_table.js create mode 100644 document-ai/parse_form.js create mode 100644 document-ai/parse_table.js create mode 100644 document-ai/parse_with_model.js create mode 100644 document-ai/set_endpoint.js rename document-ai/test/{parseForm.test.js => batch_parse_form.test.js} (87%) rename document-ai/test/{parseTable.test.js => batch_parse_table.test.js} (84%) create mode 100644 document-ai/test/parse_form.test.js create mode 100644 document-ai/test/parse_table.test.js create mode 100644 document-ai/test/parse_with_model.test.js create mode 100644 document-ai/test/set_endpoint.test.js diff --git a/document-ai/batch_parse_form.js b/document-ai/batch_parse_form.js new file mode 100644 index 0000000000..d0214fcb45 --- /dev/null +++ b/document-ai/batch_parse_form.js @@ -0,0 +1,140 @@ +/** + * Copyright 2020 Google LLC + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +const uuid = require('uuid'); + +async function main( + projectId = 'YOUR_PROJECT_ID', + location = 'YOUR_PROJECT_LOCATION', + gcsOutputUri = 'output-bucket', + gcsOutputUriPrefix = uuid.v4(), + gcsInputUri = 'gs://cloud-samples-data/documentai/invoice.pdf' +) { + // [START document_parse_form] + /** + * TODO(developer): Uncomment these variables before running the sample. + */ + // const projectId = 'YOUR_PROJECT_ID'; + // const location = 'YOUR_PROJECT_LOCATION', + // const gcsOutputUri = 'YOUR_STORAGE_BUCKET'; + // const gcsOutputUriPrefix = 'YOUR_STORAGE_PREFIX'; + // const gcsInputUri = 'GCS URI of the PDF to process'; + + // Imports the Google Cloud client library + const { + DocumentUnderstandingServiceClient, + } = require('@google-cloud/documentai'); + const {Storage} = require('@google-cloud/storage'); + + const client = new DocumentUnderstandingServiceClient(); + const storage = new Storage(); + + async function parseFormGCS(inputUri, outputUri, outputUriPrefix) { + const parent = `projects/${projectId}/locations/${location}`; + + // Configure the batch process request. + const request = { + inputConfig: { + gcsSource: { + uri: inputUri, + }, + mimeType: 'application/pdf', + }, + outputConfig: { + gcsDestination: { + uri: `${outputUri}/${outputUriPrefix}/`, + }, + pagesPerShard: 1, + }, + formExtractionParams: { + enabled: true, + keyValuePairHints: [ + { + key: 'Phone', + valueTypes: ['PHONE_NUMBER'], + }, + { + key: 'Contact', + valueTypes: ['EMAIL', 'NAME'], + }, + ], + }, + }; + + // Configure the request for batch process + const requests = { + parent, + requests: [request], + }; + + // Batch process document using a long-running operation. + // You can wait for now, or get results later. + const [operation] = await client.batchProcessDocuments(requests); + + // Wait for operation to complete. + await operation.promise(); + + console.log('Document processing complete.'); + + // Query Storage bucket for the results file(s). + const query = { + prefix: outputUriPrefix, + }; + + console.log('Fetching results ...'); + + // List all of the files in the Storage bucket + const [files] = await storage.bucket(gcsOutputUri).getFiles(query); + + files.forEach(async (fileInfo, index) => { + // Get the file as a buffer + const [file] = await fileInfo.download(); + + console.log(`Fetched file #${index + 1}:`); + + // Read the results + const results = JSON.parse(file.toString()); + + // Get all of the document text as one big string. + const {text} = results; + + // Utility to extract text anchors from text field. + const getText = textAnchor => { + const startIndex = textAnchor.textSegments[0].startIndex || 0; + const endIndex = textAnchor.textSegments[0].endIndex; + + return `\t${text.substring(startIndex, endIndex)}`; + }; + + // Process the output + const [page1] = results.pages; + const formFields = page1.formFields; + + for (const field of formFields) { + const fieldName = getText(field.fieldName.textAnchor); + const fieldValue = getText(field.fieldValue.textAnchor); + + console.log('Extracted key value pair:'); + console.log(`\t(${fieldName}, ${fieldValue})`); + } + }); + } + // [END document_parse_form] + + parseFormGCS(gcsInputUri, gcsOutputUri, gcsOutputUriPrefix); +} +main(...process.argv.slice(2)); diff --git a/document-ai/batch_parse_table.js b/document-ai/batch_parse_table.js new file mode 100644 index 0000000000..25ddb5cfe8 --- /dev/null +++ b/document-ai/batch_parse_table.js @@ -0,0 +1,148 @@ +/** + * Copyright 2020 Google LLC + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +const uuid = require('uuid'); + +async function main( + projectId = 'YOUR_PROJECT_ID', + location = 'YOUR_PROJECT_LOCATION', + gcsOutputUri = 'output-bucket', + gcsOutputUriPrefix = uuid.v4(), + gcsInputUri = 'gs://cloud-samples-data/documentai/invoice.pdf' +) { + // [START document_parse_table] + /** + * TODO(developer): Uncomment these variables before running the sample. + */ + // const projectId = 'YOUR_PROJECT_ID'; + // const location = 'YOUR_PROJECT_LOCATION'; + // const gcsOutputUri = 'YOUR_STORAGE_BUCKET'; + // const gcsOutputUriPrefix = 'YOUR_STORAGE_PREFIX'; + // const gcsInputUri = 'YOUR_SOURCE_PDF'; + + // Imports the Google Cloud client library + const { + DocumentUnderstandingServiceClient, + } = require('@google-cloud/documentai'); + const {Storage} = require('@google-cloud/storage'); + + const client = new DocumentUnderstandingServiceClient(); + const storage = new Storage(); + + async function parseTableGCS(inputUri, outputUri, outputUriPrefix) { + const parent = `projects/${projectId}/locations/${location}`; + + // Configure the batch process request. + const request = { + //parent, + inputConfig: { + gcsSource: { + uri: inputUri, + }, + mimeType: 'application/pdf', + }, + outputConfig: { + gcsDestination: { + uri: `${outputUri}/${outputUriPrefix}/`, + }, + pagesPerShard: 1, + }, + tableExtractionParams: { + enabled: true, + tableBoundHints: [ + { + boundingBox: { + normalizedVertices: [ + {x: 0, y: 0}, + {x: 1, y: 0}, + {x: 1, y: 1}, + {x: 0, y: 1}, + ], + }, + }, + ], + }, + }; + + // Configure the request for batch process + const requests = { + parent, + requests: [request], + }; + + // Batch process document using a long-running operation. + // You can wait for now, or get results later. + // Note: first request to the service takes longer than subsequent + // requests. + const [operation] = await client.batchProcessDocuments(requests); + + // Wait for operation to complete. + await operation.promise(); + + console.log('Document processing complete.'); + + // Query Storage bucket for the results file(s). + const query = { + prefix: outputUriPrefix, + }; + + console.log('Fetching results ...'); + + // List all of the files in the Storage bucket + const [files] = await storage.bucket(gcsOutputUri).getFiles(query); + + files.forEach(async (fileInfo, index) => { + // Get the file as a buffer + const [file] = await fileInfo.download(); + + console.log(`Fetched file #${index + 1}:`); + + // Read the results + const results = JSON.parse(file.toString()); + + // Get all of the document text as one big string + const text = results.text; + + // Get the first table in the document + const [page1] = results.pages; + const [table] = page1.tables; + const [headerRow] = table.headerRows; + + console.log('Results from first table processed:'); + console.log( + `First detected language: ${page1.detectedLanguages[0].languageCode}` + ); + + console.log('Header row:'); + for (const tableCell of headerRow.cells) { + if (tableCell.layout.textAnchor.textSegments) { + // Extract shards from the text field + // First shard in document doesn't have startIndex property + const startIndex = + tableCell.layout.textAnchor.textSegments[0].startIndex || 0; + const endIndex = tableCell.layout.textAnchor.textSegments[0].endIndex; + + console.log(`\t${text.substring(startIndex, endIndex)}`); + } + } + }); + } + // [END document_parse_table] + + parseTableGCS(gcsInputUri, gcsOutputUri, gcsOutputUriPrefix); +} +main(...process.argv.slice(2)); diff --git a/document-ai/parse_form.js b/document-ai/parse_form.js new file mode 100644 index 0000000000..0f26044859 --- /dev/null +++ b/document-ai/parse_form.js @@ -0,0 +1,102 @@ +/** + * Copyright 2020, Google, Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +/** + * Process a single PDF as a form. + * @param {string} projectId your Google Cloud project ID + * @param {string} location region to use for this operation + * @param {string} gcsInputUri Cloud Storage URI of the PDF document to parse + */ +async function main( + projectId, + location, + gcsInputUri = 'gs://cloud-samples-data/documentai/invoice.pdf' +) { + // [START document_quickstart] + /** + * TODO(developer): Uncomment these variables before running the sample. + */ + // const projectId = 'YOUR_PROJECT_ID'; + // const location = 'YOUR_PROJECT_LOCATION'; + // const gcsInputUri = 'YOUR_SOURCE_PDF'; + + const { + DocumentUnderstandingServiceClient, + } = require('@google-cloud/documentai'); + const client = new DocumentUnderstandingServiceClient(); + + async function parseForm() { + // Configure the request for processing the PDF + const parent = `projects/${projectId}/locations/${location}`; + const request = { + parent, + inputConfig: { + gcsSource: { + uri: gcsInputUri, + }, + mimeType: 'application/pdf', + }, + formExtractionParams: { + enabled: true, + keyValuePairHints: [ + { + key: 'Phone', + valueTypes: ['PHONE_NUMBER'], + }, + { + key: 'Contact', + valueTypes: ['EMAIL', 'NAME'], + }, + ], + }, + }; + + // Recognizes text entities in the PDF document + const [result] = await client.processDocument(request); + + // Get all of the document text as one big string + const {text} = result; + + // Extract shards from the text field + const getText = textAnchor => { + // First shard in document doesn't have startIndex property + const startIndex = textAnchor.textSegments[0].startIndex || 0; + const endIndex = textAnchor.textSegments[0].endIndex; + + return text.substring(startIndex, endIndex); + }; + + // Process the output + const [page1] = result.pages; + const {formFields} = page1; + + for (const field of formFields) { + const fieldName = getText(field.fieldName.textAnchor); + const fieldValue = getText(field.fieldValue.textAnchor); + + console.log('Extracted key value pair:'); + console.log(`\t(${fieldName}, ${fieldValue})`); + } + } + // [END document_quickstart] + await parseForm(); +} + +main(...process.argv.slice(2)).catch(err => { + console.error(err); + process.exitCode = 1; +}); diff --git a/document-ai/parse_table.js b/document-ai/parse_table.js new file mode 100644 index 0000000000..ef77dfa9af --- /dev/null +++ b/document-ai/parse_table.js @@ -0,0 +1,112 @@ +/** + * Copyright 2020, Google, Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +/** + * Process a single PDF. + * @param {string} projectId your Google Cloud project ID + * @param {string} location region to use for this operation + * @param {string} gcsInputUri Cloud Storage URI of the PDF document to parse + */ +async function main( + projectId, + location, + gcsInputUri = 'gs://cloud-samples-data/documentai/invoice.pdf' +) { + // [START document_quickstart] + /** + * TODO(developer): Uncomment these variables before running the sample. + */ + // const projectId = 'YOUR_PROJECT_ID'; + // const location = 'YOUR_PROJECT_LOCATION'; + // const gcsInputUri = 'YOUR_SOURCE_PDF'; + + const { + DocumentUnderstandingServiceClient, + } = require('@google-cloud/documentai'); + const client = new DocumentUnderstandingServiceClient(); + + async function parseTable() { + // Configure the request for processing the PDF + const parent = `projects/${projectId}/locations/${location}`; + const request = { + parent, + inputConfig: { + gcsSource: { + uri: gcsInputUri, + }, + mimeType: 'application/pdf', + }, + tableExtractionParams: { + enabled: true, + tableBoundHints: [ + { + boundingBox: { + normalizedVertices: [ + {x: 0, y: 0}, + {x: 1, y: 0}, + {x: 1, y: 1}, + {x: 0, y: 1}, + ], + }, + }, + ], + }, + }; + + // Recognizes text entities in the PDF document + const [result] = await client.processDocument(request); + + // Get all of the document text as one big string + const {text} = result; + + // Extract shards from the text field + function getText(textAnchor) { + // Text anchor has no text segments if cell is empty + if (textAnchor.textSegments.length > 0) { + // First shard in document doesn't have startIndex property + const startIndex = textAnchor.textSegments[0].startIndex || 0; + const endIndex = textAnchor.textSegments[0].endIndex; + + return text.substring(startIndex, endIndex); + } + return '[NO TEXT]'; + } + + // Get the first table in the document + const [page1] = result.pages; + const [table] = page1.tables; + const [headerRow] = table.headerRows; + + console.log('Header row:'); + for (const tableCell of headerRow.cells) { + if (tableCell.layout.textAnchor.textSegments) { + // Extract shards from the text field + // First shard in document doesn't have startIndex property + const textAnchor = tableCell.layout.textAnchor; + + console.log(`\t${getText(textAnchor)}`); + } + } + } + // [END document_quickstart] + await parseTable(); +} + +main(...process.argv.slice(2)).catch(err => { + console.error(err); + process.exitCode = 1; +}); diff --git a/document-ai/parse_with_model.js b/document-ai/parse_with_model.js new file mode 100644 index 0000000000..d849a55f27 --- /dev/null +++ b/document-ai/parse_with_model.js @@ -0,0 +1,76 @@ +/** + * Copyright 2020, Google, Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +/** + * Process a single PDF. + * @param {string} projectId your Google Cloud project ID + * @param {string} location region to use for this operation + * @param {string} autoMLModel AutoML Natural Language model to use + * @param {string} gcsInputUri Cloud Storage URI of the PDF document to parse + */ +async function main( + projectId, + location, + autoMLModel, + gcsInputUri = 'gs://cloud-samples-data/documentai/invoice.pdf' +) { + // [START document_quickstart] + /** + * TODO(developer): Uncomment these variables before running the sample. + */ + // const projectId = 'YOUR_PROJECT_ID'; + // const location = 'YOUR_PROJECT_LOCATION'; + // const autoMLModel = 'Full resource name of AutoML Natural Language model'; + // const gcsInputUri = 'YOUR_SOURCE_PDF'; + + const { + DocumentUnderstandingServiceClient, + } = require('@google-cloud/documentai'); + const client = new DocumentUnderstandingServiceClient(); + + async function parseWithModel() { + // Configure the request for processing the PDF + const parent = `projects/${projectId}/locations/${location}`; + const request = { + parent, + inputConfig: { + gcsSource: { + uri: gcsInputUri, + }, + mimeType: 'application/pdf', + }, + automlParams: { + model: autoMLModel, + }, + }; + + // Recognizes text entities in the PDF document + const [result] = await client.processDocument(request); + + for (const label of result.labels) { + console.log(`Label detected: ${label.name}`); + console.log(`Confidence: ${label.confidence}`); + } + } + // [END document_quickstart] + await parseWithModel(); +} + +main(...process.argv.slice(2)).catch(err => { + console.error(err); + process.exitCode = 1; +}); diff --git a/document-ai/set_endpoint.js b/document-ai/set_endpoint.js new file mode 100644 index 0000000000..883f3f5999 --- /dev/null +++ b/document-ai/set_endpoint.js @@ -0,0 +1,86 @@ +/** + * Copyright 2020, Google, Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +/** + * Process a single PDF. + * @param {string} projectId your Google Cloud project ID + * @param {string} location region to use for this operation + * @param {string} gcsInputUri Cloud Storage URI of the PDF document to parse + */ +async function main( + projectId, + location = 'europe-west2', + gcsInputUri = 'gs://cloud-samples-data/documentai/invoice.pdf' +) { + // [START document_quickstart] + /** + * TODO(developer): Uncomment these variables before running the sample. + */ + // const projectId = 'YOUR_PROJECT_ID'; + // const location = 'Location for operation--must match region of endpoint'; + // const gcsInputUri = 'YOUR_SOURCE_PDF'; + + const { + DocumentUnderstandingServiceClient, + } = require('@google-cloud/documentai'); + + // Specifies the location of the api endpoint + const clientOptions = {apiEndpoint: 'eu-documentai.googleapis.com'}; + const client = new DocumentUnderstandingServiceClient(clientOptions); + + async function setEndpoint() { + // Configure the request for processing the PDF + const parent = `projects/${projectId}/locations/${location}`; + const request = { + parent, + inputConfig: { + gcsSource: { + uri: gcsInputUri, + }, + mimeType: 'application/pdf', + }, + }; + + // Recognizes text entities in the PDF document + const [result] = await client.processDocument(request); + + // Get all of the document text as one big string + const {text} = result; + + // Extract shards from the text field + function extractText(textAnchor) { + // First shard in document doesn't have startIndex property + const startIndex = textAnchor.textSegments[0].startIndex || 0; + const endIndex = textAnchor.textSegments[0].endIndex; + + return text.substring(startIndex, endIndex); + } + + for (const entity of result.entities) { + console.log(`\nEntity text: ${extractText(entity.textAnchor)}`); + console.log(`Entity type: ${entity.type}`); + console.log(`Entity mention text: ${entity.mentionText}`); + } + } + // [END document_quickstart] + await setEndpoint(); +} + +main(...process.argv.slice(2)).catch(err => { + console.error(err); + process.exitCode = 1; +}); diff --git a/document-ai/test/parseForm.test.js b/document-ai/test/batch_parse_form.test.js similarity index 87% rename from document-ai/test/parseForm.test.js rename to document-ai/test/batch_parse_form.test.js index fd07c27dc8..4d603ead7d 100644 --- a/document-ai/test/parseForm.test.js +++ b/document-ai/test/batch_parse_form.test.js @@ -24,14 +24,15 @@ const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); const storage = new Storage(); const bucketName = `nodejs-docs-samples-test-${uuid.v4()}`; -const cmd = `node parseForm.js`; +const cmd = `node batch_parse_form.js`; const testParseForm = { projectId: process.env.GCLOUD_PROJECT, + location: 'us-central1', gcsOutputUriPrefix: uuid.v4(), }; -describe(`Document AI parse form`, () => { +describe(`Document AI batch parse form`, () => { before(async () => { await storage.createBucket(bucketName); }); @@ -44,7 +45,7 @@ describe(`Document AI parse form`, () => { it(`should parse the GCS invoice example as a form`, async () => { const output = execSync( - `${cmd} ${testParseForm.projectId} gs://${bucketName}/` + `${cmd} ${testParseForm.projectId} ${testParseForm.location} gs://${bucketName}` ); assert.match(output, /Extracted key value pair:/); }); diff --git a/document-ai/test/parseTable.test.js b/document-ai/test/batch_parse_table.test.js similarity index 84% rename from document-ai/test/parseTable.test.js rename to document-ai/test/batch_parse_table.test.js index 2ff0a6a466..87e35d2c4d 100644 --- a/document-ai/test/parseTable.test.js +++ b/document-ai/test/batch_parse_table.test.js @@ -24,14 +24,15 @@ const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); const storage = new Storage(); const bucketName = `nodejs-docs-samples-test-${uuid.v4()}`; -const cmd = `node parseTable.js`; +const cmd = `node batch_parse_table.js`; const testParseTable = { projectId: process.env.GCLOUD_PROJECT, + location: 'us-central1', gcsOutputUriPrefix: uuid.v4(), }; -describe(`Document AI parse table`, () => { +describe(`Document AI batch parse table`, () => { before(async () => { await storage.createBucket(bucketName); }); @@ -44,8 +45,8 @@ describe(`Document AI parse table`, () => { it(`should parse the GCS invoice example as as table`, async () => { const output = execSync( - `${cmd} ${testParseTable.projectId} gs://${bucketName}/` + `${cmd} ${testParseTable.projectId} ${testParseTable.location} gs://${bucketName}` ); - assert.match(output, /First detected language: en/); + assert.match(output, /First detected language:/); }); }); diff --git a/document-ai/test/parse_form.test.js b/document-ai/test/parse_form.test.js new file mode 100644 index 0000000000..1b17e6242d --- /dev/null +++ b/document-ai/test/parse_form.test.js @@ -0,0 +1,35 @@ +/** + * Copyright 2019, Google, Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +const path = require('path'); +const {assert} = require('chai'); +const cp = require('child_process'); + +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +const cwd = path.join(__dirname, '..'); +const projectId = process.env.GCLOUD_PROJECT; +const LOCATION = 'us-central1'; + +describe('Document AI parse form', () => { + it('should parse the GCS invoice example as a form', async () => { + const stdout = execSync(`node ./parse_form.js ${projectId} ${LOCATION}`, { + cwd, + }); + assert.match(stdout, /Extracted key value pair:/); + }); +}); diff --git a/document-ai/test/parse_table.test.js b/document-ai/test/parse_table.test.js new file mode 100644 index 0000000000..a2cc481b63 --- /dev/null +++ b/document-ai/test/parse_table.test.js @@ -0,0 +1,35 @@ +/** + * Copyright 2019, Google, Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +const path = require('path'); +const {assert} = require('chai'); +const cp = require('child_process'); + +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +const cwd = path.join(__dirname, '..'); +const projectId = process.env.GCLOUD_PROJECT; +const LOCATION = 'us-central1'; + +describe('Document AI parse table', () => { + it('should parse the GCS invoice example as as table', async () => { + const stdout = execSync(`node ./parse_table.js ${projectId} ${LOCATION}`, { + cwd, + }); + assert.match(stdout, /Header row/); + }); +}); diff --git a/document-ai/test/parse_with_model.test.js b/document-ai/test/parse_with_model.test.js new file mode 100644 index 0000000000..7857863362 --- /dev/null +++ b/document-ai/test/parse_with_model.test.js @@ -0,0 +1,41 @@ +/** + * Copyright 2019, Google, Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +const path = require('path'); +const {assert} = require('chai'); +const cp = require('child_process'); + +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +const cwd = path.join(__dirname, '..'); +const projectId = process.env.GCLOUD_PROJECT; +const LOCATION = 'us-central1'; +const MODEL_NAME = + process.env.MODEL_NAME || + 'projects/1046198160504/locations/us-central1/models/TCN7483069430457434112'; + +describe('Document AI parse with AutoML model', () => { + it('should run use an AutoML model to parse a PDF', async () => { + const stdout = execSync( + `node ./parse_with_model.js ${projectId} ${LOCATION} ${MODEL_NAME}`, + { + cwd, + } + ); + assert.match(stdout, /Label/); + }); +}); diff --git a/document-ai/test/set_endpoint.test.js b/document-ai/test/set_endpoint.test.js new file mode 100644 index 0000000000..279186dc30 --- /dev/null +++ b/document-ai/test/set_endpoint.test.js @@ -0,0 +1,35 @@ +/** + * Copyright 2019, Google, Inc. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +'use strict'; + +const path = require('path'); +const {assert} = require('chai'); +const cp = require('child_process'); + +const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'}); + +const cwd = path.join(__dirname, '..'); +const projectId = process.env.GCLOUD_PROJECT; +const LOCATION = 'europe-west2'; + +describe('Document AI set endpoint', () => { + it('should process a PDF in another region', async () => { + const stdout = execSync(`node ./set_endpoint.js ${projectId} ${LOCATION}`, { + cwd, + }); + assert.match(stdout, /Entity/); + }); +});