pdfScan.js 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940
  1. const pdfExtract = require('pdf-extract')
  2. const fs = require('fs')
  3. // const path = '/home/tomi/Downloads/2230G-900-01A_Jun_2018_User.pdf'
  4. const path = '/home/tomi/Downloads/MDO4000-B-MSO-DPO4000B-and-MDO3000-Oscilloscope-Programmer-Manual-Rev-A.pdf'
  5. const options = {
  6. type: 'text' // or 'ocr'
  7. }
  8. function getSCPICommands (pages) {
  9. const scpiCommon = /(\*\w+\??)/g
  10. // const header = ``
  11. const scpi = /((?:\*\w+|(?:\[?\w+\]?)(?=:\w+)\]?)(?:\[?:\w+\]?)*\??)(?:\s+(<?\w+>?)(?:[,|]\s*(<?\w+>?))*)?/g
  12. const scpiLines = []
  13. pages.map((page, pageIndex) => {
  14. const lines = page.split('\n')
  15. lines.map((line, lineIndex) => {
  16. const matches = line.match(scpi)
  17. if (matches) {
  18. console.log(pageIndex, lineIndex, matches)
  19. return matches
  20. }
  21. })
  22. })
  23. fs.writeFile('/home/tomi/Downloads/MDO4000.txt', pages.join('\n'), error => {
  24. if (error) console.log(error)
  25. console.log('file was saved.')
  26. })
  27. }
  28. const processor = pdfExtract(path, options, error => {
  29. if (error) return error
  30. })
  31. processor.on('complete', data => {
  32. console.log(data.text_pages)
  33. getSCPICommands(data.text_pages)
  34. })
  35. processor.on('error', error => {
  36. console.error(error)
  37. })