pdfScan.js 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. const pdfExtract = require('pdf-extract')
  2. const fs = require('fs')
  3. // const path = '/home/tomi/Downloads/2230G-900-01A_Jun_2018_User.pdf'
  4. const path = '/home/tomi/Downloads/MDO4000-B-MSO-DPO4000B-and-MDO3000-Oscilloscope-Programmer-Manual-Rev-A.pdf'
  5. const options = {
  6. type: 'text' // or 'ocr'
  7. }
  8. const
  9. function getSCPICommands(pages) {
  10. const scpiCommon = `\*\w+\??`
  11. const header = `:?(?:\[\w+:\])?(?:)`
  12. const scpi = /((?:\*\w+|(?:\[?\w+\]?)(?=:\w+)\]?)(?:\[?:\w+\]?)*\??)(?:\s+(<?\w+>?)(?:[,|]\s*(<?\w+>?))*)?/g
  13. const scpiLines = []
  14. pages.map((page, pageIndex) => {
  15. const lines = page.split('\n')
  16. lines.map((line, lineIndex) => {
  17. const matches = line.match(scpi)
  18. if (matches) {
  19. console.log(pageIndex, lineIndex, matches)
  20. return matches
  21. }
  22. })
  23. })
  24. fs.writeFile('/home/tomi/Downloads/MDO4000.txt', pages.join('\n'), error => {
  25. if (error) console.log(error)
  26. console.log('file was saved.')
  27. })
  28. }
  29. const processor = pdfExtract(path, options, error => {
  30. if (error) return error
  31. })
  32. processor.on('complete', data => {
  33. console.log(data.text_pages)
  34. getSCPICommands(data.text_pages)
  35. })
  36. processor.on('error', error => {
  37. console.error(error)
  38. })