const pdfExtract = require('pdf-extract') const fs = require('fs') // const path = '/home/tomi/Downloads/2230G-900-01A_Jun_2018_User.pdf' const path = '/home/tomi/Downloads/MDO4000-B-MSO-DPO4000B-and-MDO3000-Oscilloscope-Programmer-Manual-Rev-A.pdf' const options = { type: 'text' // or 'ocr' } const function getSCPICommands(pages) { const scpiCommon = `\*\w+\??` const header = `:?(?:\[\w+:\])?(?:)` const scpi = /((?:\*\w+|(?:\[?\w+\]?)(?=:\w+)\]?)(?:\[?:\w+\]?)*\??)(?:\s+(?)(?:[,|]\s*(?))*)?/g const scpiLines = [] pages.map((page, pageIndex) => { const lines = page.split('\n') lines.map((line, lineIndex) => { const matches = line.match(scpi) if (matches) { console.log(pageIndex, lineIndex, matches) return matches } }) }) fs.writeFile('/home/tomi/Downloads/MDO4000.txt', pages.join('\n'), error => { if (error) console.log(error) console.log('file was saved.') }) } const processor = pdfExtract(path, options, error => { if (error) return error }) processor.on('complete', data => { console.log(data.text_pages) getSCPICommands(data.text_pages) }) processor.on('error', error => { console.error(error) })