Kaynağa Gözat

working on scraper

Tomi Cvetic 7 yıl önce
ebeveyn
işleme
83eefb5037

+ 2 - 0
package.json

@@ -14,6 +14,8 @@
     "react-redux": "^5.0.5",
     "redux": "^3.6.0",
     "redux-saga": "^0.15.4",
+    "request": "^2.81.0",
+    "request-promise": "^4.2.1",
     "xlsx": "^0.10.4"
   },
   "devDependencies": {

+ 5 - 0
src/index.js

@@ -20,6 +20,7 @@ import Main from './Main'
 import playerList from './playerList'
 import calendar from './calendar'
 import layout from './layout'
+import scraper from './scraper'
 import alerts from './alerts'
 
 /**
@@ -31,6 +32,7 @@ const rootReducer = combineReducers({
   playerList: playerList.reducer,
   calendar: calendar.reducer,
   layout: layout.reducer,
+  scraper: scraper.reducer,
   alerts: alerts.reducer
 })
 console.log('Root reducer:', rootReducer)
@@ -40,6 +42,7 @@ const defaultState = {
   playerList: playerList.state,
   calendar: calendar.state,
   layout: layout.state,
+  scraper: scraper.state,
   alerts: alerts.state
 }
 console.log('Default state:', defaultState)
@@ -51,6 +54,7 @@ function * rootSaga () {
     playerList.saga(),
     calendar.saga(),
     layout.saga(),
+    scraper.saga(),
     alerts.saga()
   ])
 }
@@ -115,6 +119,7 @@ const actionCreators = {
   playerList: playerList.actions,
   calendar: calendar.actions,
   layout: layout.actions,
+  scraper: scraper.actions,
   alerts: alerts.actions
 }
 

+ 74 - 3
src/scraper/components/ScraperInterface.js

@@ -1,13 +1,84 @@
 import React from 'react'
+import { FormGroup, ControlLabel, FormControl, Button } from 'react-bootstrap'
+
+class ScraperLogin extends React.Component {
+  constructor () {
+    super()
+    this.loginStart = this.loginStart.bind(this)
+  }
+
+  loginStart (event) {
+    event.preventDefault()
+    const { actions } = this.props
+    actions.loginStart({id: this.formId.value, pwd: this.formPwd.value})
+  }
+
+  render () {
+    const { loginState } = this.props.state
+
+    return (
+      <form>
+        <FormGroup controlId='id'>
+          <ControlLabel>Turniernummer</ControlLabel>
+          <FormControl inputRef={input => { this.formId = input }} type='text' placeholder='Turniernummer eingeben' />
+        </FormGroup>
+        <FormGroup controlId='pwd'>
+          <ControlLabel>Passwort</ControlLabel>
+          <FormControl inputRef={input => { this.formPwd = input }} type='password' />
+        </FormGroup>
+        <Button type='submit' onClick={this.loginStart}>
+          Login
+        </Button>
+        {(loginState === 'not_logged_in')
+          ? <div className='alert alert-warning'>Nicht eingeloggt.</div>
+          : (loginState === 'logging_in')
+          ? <div className='alert alert-info'>Login läft...</div>
+          : (loginState === 'login_failure')
+          ? <div className='alert alert-danger'>Login fehlgeschlagen.</div>
+          : <div className='alert alert-info'>Erfolgreich eingeloggt.</div>
+        }
+      </form>
+    )
+  }
+}
+
+class ScraperTournamentSelection extends React.Component {
+  render () {
+    return (
+      <form>
+        <FormGroup controlId='formControlsSelect'>
+          <ControlLabel>Turnier</ControlLabel>
+          <FormControl componentClass='select' placeholder='Turnier auswählen'>
+            <option value='select'>select</option>
+            <option value='other'>...</option>
+          </FormControl>
+        </FormGroup>
+        <Button type='submit'>
+          Turnier laden
+        </Button>
+      </form>
+    )
+  }
+}
+
+class ScraperProgress extends React.Component {
+  render () {
+    return (
+      <div />
+    )
+  }
+}
 
 class ScraperInterface extends React.Component {
   render () {
-    const { state, actions } = this.props
+    const { actions } = this.props
 
     return (
       <div>
-        <button onClick={actions.startScraping}>Start</button>
-
+        <ScraperLogin {...this.props} />
+        <ScraperTournamentSelection />
+        <button onClick={actions.startScraping}>Turnier laden</button>
+        <ScraperProgress />
       </div>
     )
   }

+ 140 - 14
src/scraper/state.js

@@ -1,43 +1,169 @@
 /** @module setting/state */
-
+import rp from 'request-promise'
+import { takeLatest, all, call, put } from 'redux-saga/effects'
 /**
  * state.js
  *
  * Collection of everything which has to do with state changes.
  **/
 
-const SCRAPE_FILES = [
-  'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
-  'https://comp.swisstennis.ch/advantage/servlet/ProtectedDisplayTournament?Lang=D&tournament=Id104840',
-  'https://comp.swisstennis.ch/advantage/servlet/Calendar?tournament=Id104840&lang=D',
-  'https://comp.swisstennis.ch/advantage/servlet/Calendar.xls?Lang=D&tournament=Id104840&Type=Match&Inp_DateRangeFilter.fromDate=05.07.2017&Inp_DateRangeFilter.toDate=16.07.2017',
-  'https://comp.swisstennis.ch/advantage/servlet/PlayerList?tournament=Id104840&lang=D',
-  'https://comp.swisstennis.ch/advantage/servlet/PlayerList.xls?tournament=Id104840&lang=D',
-  'https://comp.swisstennis.ch/advantage/servlet/DisplayEvent?eventId=425828&lang=D',
-  'https://comp.swisstennis.ch/advantage/servlet/ModifyDraw?eventId=425828&lang=D',
-  'https://comp.swisstennis.ch/advantage/servlet/DisplayDraw.xls?eventId=425828&lang=D'
-]
+const SCRAPE_FILES = {
+  login: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+  tournament: 'https://comp.swisstennis.ch/advantage/servlet/ProtectedDisplayTournament?Lang=D&tournament=Id104840',
+  calendar: 'https://comp.swisstennis.ch/advantage/servlet/Calendar?tournament=Id104840&lang=D',
+  calendarXls: 'https://comp.swisstennis.ch/advantage/servlet/Calendar.xls?Lang=D&tournament=Id104840&Type=Match&Inp_DateRangeFilter.fromDate=05.07.2017&Inp_DateRangeFilter.toDate=16.07.2017',
+  playerList: 'https://comp.swisstennis.ch/advantage/servlet/PlayerList?tournament=Id104840&lang=D',
+  playerListXls: 'https://comp.swisstennis.ch/advantage/servlet/PlayerList.xls?tournament=Id104840&lang=D',
+  event: 'https://comp.swisstennis.ch/advantage/servlet/DisplayEvent?eventId=425828&lang=D',
+  draw: 'https://comp.swisstennis.ch/advantage/servlet/ModifyDraw?eventId=425828&lang=D',
+  drawXls: 'https://comp.swisstennis.ch/advantage/servlet/DisplayDraw.xls?eventId=425828&lang=D'
+}
 
 const DRAW_STATE = /Matches bereit zum spielen/
 
 /** actionTypes define what actions are handeled by the reducer. */
 export const actions = {
-
+  loginStart: form => {
+    return {
+      type: 'SCRAPE_LOGIN_START',
+      form
+    }
+  },
+  loginSuccess: data => {
+    return {
+      type: 'SCRAPE_LOGIN_SUCCESS',
+      data
+    }
+  },
+  loginFailure: error => {
+    return {
+      type: 'SCRAPE_LOGIN_FAILURE',
+      error
+    }
+  }
 }
 console.log('State actions', actions)
 
 /** state definition */
 export const state = {
+  loginState: 'not_logged_in',
+  jar: null,
+  pages: {},
+  files: {},
+  tournaments: {},
+  selectedTournament: {},
+  categories: {}
 }
 console.log('State state', state)
 
 /** reducer is called by the redux dispatcher and handles all component actions */
 export function reducer (state = [], action) {
   switch (action.type) {
+    case 'SCRAPE_LOGIN_START':
+      return { ...state, loginState: 'logging_in' }
+    case 'SCRAPE_LOGIN_SUCCESS':
+      const { newPages } = { ...state.pages }
+      newPages.myTournaments = action.data.myTournamentsPage
+      return { ...state, loginState: 'logged_in', jar: action.data.jar, pages: newPages }
+    case 'SCRAPE_LOGIN_FAILURE':
+      return { ...state, loginState: 'login_failure' }
     default:
       return state
   }
 }
 
+function * swisstennisLogin (action) {
+  console.log('swisstennisLogin', action)
+  const jar = rp.jar()
+
+  const requestOptions = {
+    uri: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+    // method: 'GET',
+    jar,
+    headers: {
+      Host: 'comp.swisstennis.ch',
+      'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
+      Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+      'Accept-Language': 'de-CH,de;q=0.8,en-US;q=0.5,en;q=0.3',
+      Connection: 'keep-alive',
+      'Upgrade-Insecure-Requests': '1',
+      'Cache-Control': 'max-age=0'
+    },
+    resolveWithFullResponse: true
+  }
+  const loginOptions = {
+    uri: 'https://comp.swisstennis.ch/advantage/servlet/Login',
+    // method: 'POST',
+    jar,
+    form: {
+      Lang: 'D',
+      id: action.form.id,
+      pwd: action.form.pwd,
+      Tournament: ''
+    },
+    headers: {
+      Host: 'comp.swisstennis.ch',
+      'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
+      Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+      'Accept-Language': 'de-CH,de;q=0.8,en-US;q=0.5,en;q=0.3',
+      Referer: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+      'Upgrade-Insecure-Requests': '1',
+      Connection: 'keep-alive'
+    }
+  }
+  const downloadOptions = {
+    uri: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+    // method: 'GET',
+    jar,
+    headers: {
+      Host: 'comp.swisstennis.ch',
+      'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
+      Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+      'Accept-Language': 'de-CH,de;q=0.8,en-US;q=0.5,en;q=0.3',
+      Referer: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+      'Upgrade-Insecure-Requests': '1',
+      Connection: 'keep-alive'
+    }
+  }
+  console.log(jar)
+  try {
+    console.log('attempting to fetch login page.')
+    const reqPage = yield call(rp.get, requestOptions)
+    console.log('successfully fetched login page.', reqPage, jar)
+  } catch (error) {
+    console.log('Error fetching login page.', jar)
+    yield put(actions.loginFailure(error))
+    return
+  }
+  try {
+    console.log('attempting to login.', loginOptions)
+    const loginPage = yield call(rp.post, loginOptions)
+    console.log('received a page.', loginPage, jar)
+    if (loginPage.includes('Zugriff verweigert')) {
+      yield put(actions.loginFailure(Error('Login rejected')))
+    } else {
+      yield put(actions.loginFailure(Error('Other login problem')))
+    }
+    return
+  } catch (error) {
+    console.log('successfully logged in.', error)
+    return
+  }
+  try {
+    console.log('attempting to fetch my tournaments.')
+    const myTournamentsPage = yield call(rp.get, downloadOptions)
+    const match = myTournamentsPage.match(/<a href=".*tournament=Id(\d+)">([^<]+)<\/a>/gm)
+    console.log('tournament page', match)
+    yield put(actions.loginSuccess({ myTournamentsPage, jar }))
+  } catch (error) {
+    console.log('Error fetching tournaments.')
+    yield put(actions.loginFailure(Error('Error fetching tournaments')))
+  }
+}
+
 /** sagas are asynchronous workers (JS generators) to handle the state. */
-export function * saga () {}
+export function * saga () {
+  yield all([
+    takeLatest('SCRAPE_LOGIN_START', swisstennisLogin)
+  ])
+}

+ 86 - 0
src/scraper/test.js

@@ -0,0 +1,86 @@
+const rp = require('request-promise')
+const jar = rp.jar()
+
+const requestOptions = {
+  uri: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+  // method: 'GET',
+  jar,
+  headers: {
+    Host: 'comp.swisstennis.ch',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
+    Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+    'Accept-Language': 'de-CH,de;q=0.8,en-US;q=0.5,en;q=0.3',
+    // 'Accept-Encoding': 'gzip, deflate, br',
+    // Cookie: JSESSIONID=EE3CDACFFEBA200E8359F85E1C0DFB01; _ga=GA1.2.478228767.1500102081; _gid=GA1.2.1193701205.1500102081,
+    Connection: 'keep-alive',
+    'Upgrade-Insecure-Requests': '1',
+    'Cache-Control': 'max-age=0'
+  },
+  resolveWithFullResponse: true
+}
+
+const loginOptions = {
+  uri: 'https://comp.swisstennis.ch/advantage/servlet/Login',
+  // method: 'POST',
+  jar,
+  form: {
+    Lang: 'D',
+    id: '105',
+    pwd: 'S3589V',
+    Tournament: ''
+  },
+  headers: {
+    Host: 'comp.swisstennis.ch',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
+    Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+    'Accept-Language': 'de-CH,de;q=0.8,en-US;q=0.5,en;q=0.3',
+    // 'Accept-Encoding': 'gzip, deflate, br',
+    Referer: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+    // Cookie: JSESSIONID=53BE225CCCAC5F8D1FDB9C126F3A737C; _ga=GA1.2.478228767.1500102081; _gid=GA1.2.1193701205.1500102081
+    'Upgrade-Insecure-Requests': '1',
+    Connection: 'keep-alive'
+  }
+}
+
+const downloadOptions = {
+  uri: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+  // method: 'GET',
+  jar,
+  headers: {
+    Host: 'comp.swisstennis.ch',
+    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
+    Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
+    'Accept-Language': 'de-CH,de;q=0.8,en-US;q=0.5,en;q=0.3',
+    // 'Accept-Encoding': 'gzip, deflate, br',
+    Referer: 'https://comp.swisstennis.ch/advantage/servlet/MyTournamentList?Lang=D',
+    // Cookie: JSESSIONID=53BE225CCCAC5F8D1FDB9C126F3A737C; _ga=GA1.2.478228767.1500102081; _gid=GA1.2.1193701205.1500102081,
+    // 'Upgrade-Insecure-Requests': '1',
+    Connection: 'keep-alive'
+  }
+}
+
+console.log('Requesting login page.')
+rp.get(requestOptions).then(body => {
+  console.log(body)
+  console.log(jar)
+  console.log('Requesting login.')
+  rp.post(loginOptions).then(body => {
+    console.log('in then')
+    console.log(body)
+    console.log(jar)
+    console.log('Requesting tournament list.')
+    rp.get(downloadOptions).then(body => {
+      console.log(body)
+      console.log(jar)
+    })
+  }).catch(body => {
+    console.log('in catch')
+    console.log(body)
+    console.log(jar)
+    console.log('Requesting tournament list.')
+    rp.get(downloadOptions).then(body => {
+      console.log(body)
+      console.log(jar)
+    })
+  })
+})

+ 2 - 0
src/startPage/components/StartPage.js

@@ -1,6 +1,7 @@
 import React from 'react'
 import { PlayerForm } from '../../playerList/components'
 import { MatchForm } from '../../calendar/components'
+import { ScraperInterface } from '../../scraper/components'
 
 class StartPage extends React.Component {
   render () {
@@ -10,6 +11,7 @@ class StartPage extends React.Component {
         <p>Willkommen beim SZTM Planungshelfer</p>
         <PlayerForm state={this.props.playerList} actions={this.props.playerListActions} />
         <MatchForm state={this.props.calendar} actions={this.props.calendarActions} />
+        <ScraperInterface state={this.props.scraper} actions={this.props.scraperActions} />
       </div>
     )
   }

+ 23 - 8
yarn.lock

@@ -988,7 +988,7 @@ block-stream@*:
   dependencies:
     inherits "~2.0.0"
 
-bluebird@^3.4.7:
+bluebird@^3.4.7, bluebird@^3.5.0:
   version "3.5.0"
   resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.5.0.tgz#791420d7f551eea2897453a8a77653f96606d67c"
 
@@ -3818,7 +3818,7 @@ lodash.uniq@^4.5.0:
   version "4.5.0"
   resolved "https://registry.yarnpkg.com/lodash.uniq/-/lodash.uniq-4.5.0.tgz#d0225373aeb652adc1bc82e4945339a842754773"
 
-"lodash@>=3.5 <5", lodash@^4.0.0, lodash@^4.14.0, lodash@^4.15.0, lodash@^4.17.2, lodash@^4.17.3, lodash@^4.2.0, lodash@^4.2.1, lodash@^4.3.0:
+"lodash@>=3.5 <5", lodash@^4.0.0, lodash@^4.13.1, lodash@^4.14.0, lodash@^4.15.0, lodash@^4.17.2, lodash@^4.17.3, lodash@^4.2.0, lodash@^4.2.1, lodash@^4.3.0:
   version "4.17.4"
   resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.4.tgz#78203a4d1c328ae1d86dca6460e369b57f4055ae"
 
@@ -3935,11 +3935,7 @@ miller-rabin@^4.0.0:
     bn.js "^4.0.0"
     brorand "^1.0.1"
 
-"mime-db@>= 1.27.0 < 2":
-  version "1.28.0"
-  resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.28.0.tgz#fedd349be06d2865b7fc57d837c6de4f17d7ac3c"
-
-mime-db@~1.27.0:
+"mime-db@>= 1.27.0 < 2", mime-db@~1.27.0:
   version "1.27.0"
   resolved "https://registry.yarnpkg.com/mime-db/-/mime-db-1.27.0.tgz#820f572296bbd20ec25ed55e5b5de869e5436eb1"
 
@@ -5241,6 +5237,21 @@ repeating@^2.0.0:
   dependencies:
     is-finite "^1.0.0"
 
+request-promise-core@1.1.1:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/request-promise-core/-/request-promise-core-1.1.1.tgz#3eee00b2c5aa83239cfb04c5700da36f81cd08b6"
+  dependencies:
+    lodash "^4.13.1"
+
+request-promise@^4.2.1:
+  version "4.2.1"
+  resolved "https://registry.yarnpkg.com/request-promise/-/request-promise-4.2.1.tgz#7eec56c89317a822cbfea99b039ce543c2e15f67"
+  dependencies:
+    bluebird "^3.5.0"
+    request-promise-core "1.1.1"
+    stealthy-require "^1.1.0"
+    tough-cookie ">=2.3.0"
+
 request@^2.79.0, request@^2.81.0:
   version "2.81.0"
   resolved "https://registry.yarnpkg.com/request/-/request-2.81.0.tgz#c6928946a0e06c5f8d6f8a9333469ffda46298a0"
@@ -5652,6 +5663,10 @@ sshpk@^1.7.0:
   version "1.3.1"
   resolved "https://registry.yarnpkg.com/statuses/-/statuses-1.3.1.tgz#faf51b9eb74aaef3b3acf4ad5f61abf24cb7b93e"
 
+stealthy-require@^1.1.0:
+  version "1.1.1"
+  resolved "https://registry.yarnpkg.com/stealthy-require/-/stealthy-require-1.1.1.tgz#35b09875b4ff49f26a777e509b3090a3226bf24b"
+
 stream-browserify@^2.0.1:
   version "2.0.1"
   resolved "https://registry.yarnpkg.com/stream-browserify/-/stream-browserify-2.0.1.tgz#66266ee5f9bdb9940a4e4514cafb43bb71e5c9db"
@@ -5924,7 +5939,7 @@ toposort@^1.0.0:
   version "1.0.3"
   resolved "https://registry.yarnpkg.com/toposort/-/toposort-1.0.3.tgz#f02cd8a74bd8be2fc0e98611c3bacb95a171869c"
 
-tough-cookie@^2.3.2, tough-cookie@~2.3.0:
+tough-cookie@>=2.3.0, tough-cookie@^2.3.2, tough-cookie@~2.3.0:
   version "2.3.2"
   resolved "https://registry.yarnpkg.com/tough-cookie/-/tough-cookie-2.3.2.tgz#f081f76e4c85720e6c37a5faced737150d84072a"
   dependencies: