Skip to content

Commit

Permalink
feat: updated library from protos (#9)
Browse files Browse the repository at this point in the history
  • Loading branch information
telpirion authored and Ace Nassri committed Nov 14, 2022
1 parent 7d64ced commit 2c0f5a3
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 20 deletions.
4 changes: 2 additions & 2 deletions document-ai/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@
"test": "mocha test/*.js --timeout 600000"
},
"dependencies": {
"@google-cloud/documentai": "^0.0.1",
"@google-cloud/documentai": "^0.1.0",
"@google-cloud/storage": "^4.2.0"
},
"devDependencies": {
"chai": "^4.2.0",
"mocha": "^6.2.0"
}
}
}
2 changes: 1 addition & 1 deletion document-ai/parseForm.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ async function main(

// Configure the request for batch process
const requests = {
parent: `projects/${projectId}`,
parent: `projects/${projectId}/locations/us-central1`,
requests: [request],
};

Expand Down
2 changes: 1 addition & 1 deletion document-ai/parseTable.js
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ async function main(

// Configure the request for batch process
const requests = {
parent: `projects/${projectId}`,
parent: `projects/${projectId}/locations/us-central1`,
requests: [request],
};

Expand Down
72 changes: 58 additions & 14 deletions document-ai/quickstart.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2019, Google, Inc.
* Copyright 2020, Google, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
Expand All @@ -16,21 +16,65 @@
'use strict';

/**
* DESCRIBE WHAT THIS SAMPLE DOES.
* @param {string} LIST EXPECTED ARGUMENTS.
* Process a single PDF.
* @param {string} projectId your Google Cloud project ID
* @param {string} location region to use for this operation
* @param {string} gcsInputUri Cloud Storage URI of the PDF document to parse
*/
async function main() {
// [START LIBRARY_NAME_quickstart]
async function batchProcessDocument() {
const {
DocumentUnderstandingServiceClient,
} = require('@google-cloud/documentai');
const client = new DocumentUnderstandingServiceClient();
// TODO: write sample here that demonstrates batch processing of documents.
console.info(client);
async function main(
projectId,
location,
gcsInputUri = 'gs://cloud-samples-data/documentai/invoice.pdf'
) {
// [START document_quickstart]
/**
* TODO(developer): Uncomment these variables before running the sample.
*/
// const projectId = 'YOUR_PROJECT_ID';
// const location = 'YOUR_PROJECT_LOCATION';
// const gcsInputUri = 'YOUR_SOURCE_PDF';

const {
DocumentUnderstandingServiceClient,
} = require('@google-cloud/documentai');
const client = new DocumentUnderstandingServiceClient();

async function quickstart() {
// Configure the request for processing the PDF
const parent = `projects/${projectId}/locations/${location}`;
const request = {
parent,
inputConfig: {
gcsSource: {
uri: gcsInputUri,
},
mimeType: 'application/pdf',
},
};

// Recognizes text entities in the PDF document
const [result] = await client.processDocument(request);

// Get all of the document text as one big string
const {text} = result;

// Extract shards from the text field
function extractText(textAnchor) {
// First shard in document doesn't have startIndex property
const startIndex = textAnchor.textSegments[0].startIndex || 0;
const endIndex = textAnchor.textSegments[0].endIndex;

return text.substring(startIndex, endIndex);
}

for (const entity of result.entities) {
console.log(`\nEntity text: ${extractText(entity.textAnchor)}`);
console.log(`Entity type: ${entity.type}`);
console.log(`Entity mention text: ${entity.mentionText}`);
}
}
// [END LIBRARY_NAME_quickstart]
await batchProcessDocument();
// [END document_quickstart]
await quickstart();
}

main(...process.argv.slice(2)).catch(err => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,14 @@ const cp = require('child_process');
const execSync = cmd => cp.execSync(cmd, {encoding: 'utf-8'});

const cwd = path.join(__dirname, '..');
const projectId = process.env.GCLOUD_PROJECT;
const LOCATION = 'us-central1';

describe('Quickstart', () => {
it('should run quickstart', async () => {
const stdout = execSync(`node ./quickstart.js`, {cwd});
assert.ok(stdout);
const stdout = execSync(`node ./quickstart.js ${projectId} ${LOCATION}`, {
cwd,
});
assert.match(stdout, /Entity/);
});
});

0 comments on commit 2c0f5a3

Please sign in to comment.