pdfScan.js 810 B

1234567891011121314151617181920212223242526272829
  1. const pdfExtract = require('pdf-extract')
  2. const path = '/home/tomi/Downloads/2230G-900-01A_Jun_2018_User.pdf'
  3. const options = {
  4. type: 'text' // or 'ocr'
  5. }
  6. function getSCPICommands (pages) {
  7. const scpiCommon = /(\*\w+\??)/g
  8. const scpi = /((?:\*\w+|(?:\[?\w+\]?)(?=:\w+)\]?)(?:\[?:\w+\]?)*\??)(?:\s+(<?\w+>?)(?:[,|]\s*(<?\w+>?))*)?/g
  9. pages.map((page, pageIndex) => {
  10. const lines = page.split('\n')
  11. lines.map((line, lineIndex) => {
  12. const matches = line.match(scpi)
  13. if (matches) console.log(pageIndex, lineIndex, matches)
  14. })
  15. })
  16. }
  17. const processor = pdfExtract(path, options, error => {
  18. if (error) return error
  19. })
  20. processor.on('complete', data => {
  21. console.log(data.text_pages)
  22. getSCPICommands(data.text_pages)
  23. })
  24. processor.on('error', error => {
  25. console.error(error)
  26. })