diff --git a/document-ai/quickstart.js b/document-ai/quickstart.js deleted file mode 100644 index a1fe28e1ad..0000000000 --- a/document-ai/quickstart.js +++ /dev/null @@ -1,94 +0,0 @@ -/** - * Copyright 2020, Google, Inc. - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -'use strict'; - -async function main(projectId, location, processorId, filePath) { - // [START documentai_quickstart] - /** - * TODO(developer): Uncomment these variables before running the sample. - */ - // const projectId = 'YOUR_PROJECT_ID'; - // const location = 'YOUR_PROJECT_LOCATION'; // Format is 'us' or 'eu' - // const processorId = 'YOUR_PROCESSOR_ID'; // Create processor in Cloud Console - // const filePath = '/path/to/local/pdf'; - - const {DocumentProcessorServiceClient} = - require('@google-cloud/documentai').v1; - - // Instantiates a client - // apiEndpoint regions available: eu-documentai.googleapis.com, us-documentai.googleapis.com (Required if using eu based processor) - // const client = new DocumentProcessorServiceClient({apiEndpoint: 'eu-documentai.googleapis.com'}); - const client = new DocumentProcessorServiceClient(); - - async function quickstart() { - // The full resource name of the processor, e.g.: - // projects/project-id/locations/location/processor/processor-id - // You must create new processors in the Cloud Console first - const name = `projects/${projectId}/locations/${location}/processors/${processorId}`; - - // Read the file into memory. - const fs = require('fs').promises; - const imageFile = await fs.readFile(filePath); - - // Convert the image data to a Buffer and base64 encode it. - const encodedImage = Buffer.from(imageFile).toString('base64'); - - const request = { - name, - rawDocument: { - content: encodedImage, - mimeType: 'application/pdf', - }, - }; - - // Recognizes text entities in the PDF document - const [result] = await client.processDocument(request); - const {document} = result; - - // Get all of the document text as one big string - const {text} = document; - - // Extract shards from the text field - const getText = textAnchor => { - if (!textAnchor.textSegments || textAnchor.textSegments.length === 0) { - return ''; - } - - // First shard in document doesn't have startIndex property - const startIndex = textAnchor.textSegments[0].startIndex || 0; - const endIndex = textAnchor.textSegments[0].endIndex; - - return text.substring(startIndex, endIndex); - }; - - // Read the text recognition output from the processor - console.log('The document contains the following paragraphs:'); - const [page1] = document.pages; - const {paragraphs} = page1; - - for (const paragraph of paragraphs) { - const paragraphText = getText(paragraph.layout.textAnchor); - console.log(`Paragraph text:\n${paragraphText}`); - } - } - // [END documentai_quickstart] - await quickstart(); -} - -main(...process.argv.slice(2)).catch(err => { - console.error(err); - process.exitCode = 1; -});