Sfoglia il codice sorgente

working on scraper

Tomi Cvetic 7 anni fa
parent
commit
783ffc3474
3 ha cambiato i file con 300 aggiunte e 17 eliminazioni
  1. 74 3
      src/scraper/components/ScraperInterface.js
  2. 140 14
      src/scraper/state.js
  3. 86 0
      src/scraper/test.js

+ 74 - 3
src/scraper/components/ScraperInterface.js

@@ -1,13 +1,84 @@
 import React from 'react'
+import { FormGroup, ControlLabel, FormControl, Button } from 'react-bootstrap'
+
+class ScraperLogin extends React.Component {
+  constructor () {
+    super()
+    this.loginStart = this.loginStart.bind(this)
+  }
+
+  loginStart (event) {
+    event.preventDefault()
+    const { actions } = this.props
+    actions.loginStart({id: this.formId.value, pwd: this.formPwd.value})
+  }
+
+  render () {
+    const { loginState } = this.props.state
+
+    return (
+      <form>
+        <FormGroup controlId='id'>
+          <ControlLabel>Turniernummer</ControlLabel>
+          <FormControl inputRef={input => { this.formId = input }} type='text' placeholder='Turniernummer eingeben' />
+        </FormGroup>
+        <FormGroup controlId='pwd'>
+          <ControlLabel>Passwort</ControlLabel>
+          <FormControl inputRef={input => { this.formPwd = input }} type='password' />
+        </FormGroup>
+        <Button type='submit' onClick={this.loginStart}>
+          Login
+        </Button>
+        {(loginState === 'not_logged_in')
+          ? <div className='alert alert-warning'>Nicht eingeloggt.</div>
+          : (loginState === 'logging_in')
+          ? <div className='alert alert-info'>Login läft...</div>
+          : (loginState === 'login_failure')
+          ? <div className='alert alert-danger'>Login fehlgeschlagen.</div>
+          : <div className='alert alert-info'>Erfolgreich eingeloggt.</div>
+        }
+      </form>
+    )
+  }
+}
+
+class ScraperTournamentSelection extends React.Component {
+  render () {
+    return (
+      <form>
+        <FormGroup controlId='formControlsSelect'>
+          <ControlLabel>Turnier</ControlLabel>
+          <FormControl componentClass='select' placeholder='Turnier auswählen'>
+            <option value='select'>select</option>
+            <option value='other'>...</option>
+          </FormControl>
+        </FormGroup>
+        <Button type='submit'>
+          Turnier laden
+        </Button>
+      </form>
+    )
+  }
+}
+
+class ScraperProgress extends React.Component {
+  render () {
+    return (
+      <div />
+    )
+  }
+}
 
 class ScraperInterface extends React.Component {
   render () {
-    const { state, actions } = this.props
+    const { actions } = this.props
 
     return (
       <div>
-        <button onClick={actions.startScraping}>Start</button>
-
+        <ScraperLogin {...this.props} />
+        <ScraperTournamentSelection />
+        <button onClick={actions.startScraping}>Turnier laden</button>
+        <ScraperProgress />
       </div>
     )
   }

+ 140 - 14
src/scraper/state.js

@@ -1,43 +1,169 @@
 /** @module setting/state */
-
+import rp from 'request-promise'
+import { takeLatest, all, call, put } from 'redux-saga/effects'
 /**
  * state.js
  *
  * Collection of everything which has to do with state changes.
  **/
 
-const SCRAPE_FILES = [
-  'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
-  'https://comp.swisstennis.ch/advantage/servlet/ProtectedDisplayTournament?Lang=D&tournament=Id104840',
-  'https://comp.swisstennis.ch/advantage/servlet/Calendar?tournament=Id104840&lang=D',
-  'https://comp.swisstennis.ch/advantage/servlet/Calendar.xls?Lang=D&tournament=Id104840&Type=Match&Inp_DateRangeFilter.fromDate=05.07.2017&Inp_DateRangeFilter.toDate=16.07.2017',
-  'https://comp.swisstennis.ch/advantage/servlet/PlayerList?tournament=Id104840&lang=D',
-  'https://comp.swisstennis.ch/advantage/servlet/PlayerList.xls?tournament=Id104840&lang=D',
-  'https://comp.swisstennis.ch/advantage/servlet/DisplayEvent?eventId=425828&lang=D',
-  'https://comp.swisstennis.ch/advantage/servlet/ModifyDraw?eventId=425828&lang=D',
-  'https://comp.swisstennis.ch/advantage/servlet/DisplayDraw.xls?eventId=425828&lang=D'
-]
+const SCRAPE_FILES = {
+  login: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+  tournament: 'https://comp.swisstennis.ch/advantage/servlet/ProtectedDisplayTournament?Lang=D&tournament=Id104840',
+  calendar: 'https://comp.swisstennis.ch/advantage/servlet/Calendar?tournament=Id104840&lang=D',
+  calendarXls: 'https://comp.swisstennis.ch/advantage/servlet/Calendar.xls?Lang=D&tournament=Id104840&Type=Match&Inp_DateRangeFilter.fromDate=05.07.2017&Inp_DateRangeFilter.toDate=16.07.2017',
+  playerList: 'https://comp.swisstennis.ch/advantage/servlet/PlayerList?tournament=Id104840&lang=D',
+  playerListXls: 'https://comp.swisstennis.ch/advantage/servlet/PlayerList.xls?tournament=Id104840&lang=D',
+  event: 'https://comp.swisstennis.ch/advantage/servlet/DisplayEvent?eventId=425828&lang=D',
+  draw: 'https://comp.swisstennis.ch/advantage/servlet/ModifyDraw?eventId=425828&lang=D',
+  drawXls: 'https://comp.swisstennis.ch/advantage/servlet/DisplayDraw.xls?eventId=425828&lang=D'
+}
 
 const DRAW_STATE = /Matches bereit zum spielen/
 
 /** actionTypes define what actions are handeled by the reducer. */
 export const actions = {
-
+  loginStart: form => {
+    return {
+      type: 'SCRAPE_LOGIN_START',
+      form
+    }
+  },
+  loginSuccess: data => {
+    return {
+      type: 'SCRAPE_LOGIN_SUCCESS',
+      data
+    }
+  },
+  loginFailure: error => {
+    return {
+      type: 'SCRAPE_LOGIN_FAILURE',
+      error
+    }
+  }
 }
 console.log('State actions', actions)
 
 /** state definition */
 export const state = {
+  loginState: 'not_logged_in',
+  jar: null,
+  pages: {},
+  files: {},
+  tournaments: {},
+  selectedTournament: {},
+  categories: {}
 }
 console.log('State state', state)
 
 /** reducer is called by the redux dispatcher and handles all component actions */
 export function reducer (state = [], action) {
   switch (action.type) {
+    case 'SCRAPE_LOGIN_START':
+      return { ...state, loginState: 'logging_in' }
+    case 'SCRAPE_LOGIN_SUCCESS':
+      const { newPages } = { ...state.pages }
+      newPages.myTournaments = action.data.myTournamentsPage
+      return { ...state, loginState: 'logged_in', jar: action.data.jar, pages: newPages }
+    case 'SCRAPE_LOGIN_FAILURE':
+      return { ...state, loginState: 'login_failure' }
     default:
       return state
   }
 }
 
+function * swisstennisLogin (action) {
+  console.log('swisstennisLogin', action)
+  const jar = rp.jar()
+
+  const requestOptions = {
+    uri: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+    // method: 'GET',
+    jar,
+    headers: {
+      Host: 'comp.swisstennis.ch',
+      'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
+      Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+      'Accept-Language': 'de-CH,de;q=0.8,en-US;q=0.5,en;q=0.3',
+      Connection: 'keep-alive',
+      'Upgrade-Insecure-Requests': '1',
+      'Cache-Control': 'max-age=0'
+    },
+    resolveWithFullResponse: true
+  }
+  const loginOptions = {
+    uri: 'https://comp.swisstennis.ch/advantage/servlet/Login',
+    // method: 'POST',
+    jar,
+    form: {
+      Lang: 'D',
+      id: action.form.id,
+      pwd: action.form.pwd,
+      Tournament: ''
+    },
+    headers: {
+      Host: 'comp.swisstennis.ch',
+      'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
+      Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+      'Accept-Language': 'de-CH,de;q=0.8,en-US;q=0.5,en;q=0.3',
+      Referer: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+      'Upgrade-Insecure-Requests': '1',
+      Connection: 'keep-alive'
+    }
+  }
+  const downloadOptions = {
+    uri: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+    // method: 'GET',
+    jar,
+    headers: {
+      Host: 'comp.swisstennis.ch',
+      'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
+      Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+      'Accept-Language': 'de-CH,de;q=0.8,en-US;q=0.5,en;q=0.3',
+      Referer: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+      'Upgrade-Insecure-Requests': '1',
+      Connection: 'keep-alive'
+    }
+  }
+  console.log(jar)
+  try {
+    console.log('attempting to fetch login page.')
+    const reqPage = yield call(rp.get, requestOptions)
+    console.log('successfully fetched login page.', reqPage, jar)
+  } catch (error) {
+    console.log('Error fetching login page.', jar)
+    yield put(actions.loginFailure(error))
+    return
+  }
+  try {
+    console.log('attempting to login.', loginOptions)
+    const loginPage = yield call(rp.post, loginOptions)
+    console.log('received a page.', loginPage, jar)
+    if (loginPage.includes('Zugriff verweigert')) {
+      yield put(actions.loginFailure(Error('Login rejected')))
+    } else {
+      yield put(actions.loginFailure(Error('Other login problem')))
+    }
+    return
+  } catch (error) {
+    console.log('successfully logged in.', error)
+    return
+  }
+  try {
+    console.log('attempting to fetch my tournaments.')
+    const myTournamentsPage = yield call(rp.get, downloadOptions)
+    const match = myTournamentsPage.match(/<a href=".*tournament=Id(\d+)">([^<]+)<\/a>/gm)
+    console.log('tournament page', match)
+    yield put(actions.loginSuccess({ myTournamentsPage, jar }))
+  } catch (error) {
+    console.log('Error fetching tournaments.')
+    yield put(actions.loginFailure(Error('Error fetching tournaments')))
+  }
+}
+
 /** sagas are asynchronous workers (JS generators) to handle the state. */
-export function * saga () {}
+export function * saga () {
+  yield all([
+    takeLatest('SCRAPE_LOGIN_START', swisstennisLogin)
+  ])
+}

+ 86 - 0
src/scraper/test.js

@@ -0,0 +1,86 @@
+const rp = require('request-promise')
+const jar = rp.jar()
+
+const requestOptions = {
+  uri: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+  // method: 'GET',
+  jar,
+  headers: {
+    Host: 'comp.swisstennis.ch',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
+    Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+    'Accept-Language': 'de-CH,de;q=0.8,en-US;q=0.5,en;q=0.3',
+    // 'Accept-Encoding': 'gzip, deflate, br',
+    // Cookie: JSESSIONID=EE3CDACFFEBA200E8359F85E1C0DFB01; _ga=GA1.2.478228767.1500102081; _gid=GA1.2.1193701205.1500102081,
+    Connection: 'keep-alive',
+    'Upgrade-Insecure-Requests': '1',
+    'Cache-Control': 'max-age=0'
+  },
+  resolveWithFullResponse: true
+}
+
+const loginOptions = {
+  uri: 'https://comp.swisstennis.ch/advantage/servlet/Login',
+  // method: 'POST',
+  jar,
+  form: {
+    Lang: 'D',
+    id: '105',
+    pwd: 'S3589V',
+    Tournament: ''
+  },
+  headers: {
+    Host: 'comp.swisstennis.ch',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
+    Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+    'Accept-Language': 'de-CH,de;q=0.8,en-US;q=0.5,en;q=0.3',
+    // 'Accept-Encoding': 'gzip, deflate, br',
+    Referer: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+    // Cookie: JSESSIONID=53BE225CCCAC5F8D1FDB9C126F3A737C; _ga=GA1.2.478228767.1500102081; _gid=GA1.2.1193701205.1500102081
+    'Upgrade-Insecure-Requests': '1',
+    Connection: 'keep-alive'
+  }
+}
+
+const downloadOptions = {
+  uri: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+  // method: 'GET',
+  jar,
+  headers: {
+    Host: 'comp.swisstennis.ch',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
+    Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+    'Accept-Language': 'de-CH,de;q=0.8,en-US;q=0.5,en;q=0.3',
+    // 'Accept-Encoding': 'gzip, deflate, br',
+    Referer: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+    // Cookie: JSESSIONID=53BE225CCCAC5F8D1FDB9C126F3A737C; _ga=GA1.2.478228767.1500102081; _gid=GA1.2.1193701205.1500102081,
+    // 'Upgrade-Insecure-Requests': '1',
+    Connection: 'keep-alive'
+  }
+}
+
+console.log('Requesting login page.')
+rp.get(requestOptions).then(body => {
+  console.log(body)
+  console.log(jar)
+  console.log('Requesting login.')
+  rp.post(loginOptions).then(body => {
+    console.log('in then')
+    console.log(body)
+    console.log(jar)
+    console.log('Requesting tournament list.')
+    rp.get(downloadOptions).then(body => {
+      console.log(body)
+      console.log(jar)
+    })
+  }).catch(body => {
+    console.log('in catch')
+    console.log(body)
+    console.log(jar)
+    console.log('Requesting tournament list.')
+    rp.get(downloadOptions).then(body => {
+      console.log(body)
+      console.log(jar)
+    })
+  })
+})