diff options
Diffstat (limited to 'management/parsers')
| -rw-r--r-- | management/parsers/abstractparser.py | 5 | ||||
| -rw-r--r-- | management/parsers/ups2018.py | 67 | 
2 files changed, 42 insertions, 30 deletions
| diff --git a/management/parsers/abstractparser.py b/management/parsers/abstractparser.py index 8d55b6d..3164082 100644 --- a/management/parsers/abstractparser.py +++ b/management/parsers/abstractparser.py @@ -50,3 +50,8 @@ class AbstractParser(metaclass=abc.ABCMeta):      def get_source(self):          return self._make_request(self.source.url) + + +class ParserError(Exception): +    def __init__(self, message): +        super(Exception, self).__init__(message) diff --git a/management/parsers/ups2018.py b/management/parsers/ups2018.py index 522a26a..f1da5bf 100644 --- a/management/parsers/ups2018.py +++ b/management/parsers/ups2018.py @@ -28,20 +28,22 @@ import requests  from ...models import Course, Group, Room  from ...utils import get_current_week, get_week -from .abstractparser import AbstractParser +from .abstractparser import AbstractParser, ParserError  VARNAME = "v.events.list = "  def find_events_list(soup):      res = [] +    found = False      for script in soup.xpath("//script/text()"):          if VARNAME in script:              for var in script.split('\n'):                  if var.startswith(VARNAME):                      res = json.loads(var[len(VARNAME):-2]) +                    found = True -    return res +    return res, found  def get_next_month(dt): @@ -52,21 +54,40 @@ def get_next_month(dt):  class Parser(AbstractParser):      def __init__(self, source):          super(Parser, self).__init__(source) +        self.events = [self._make_request(source.url)] +        self.source = source + +    def _make_request(self, url, date=None): +        events, found = [], False +        attempts = 0 +        params = {} + +        if date is not None: +            params["Date"] = date + +        while not found: +            if attempts == 3: +                raise ParserError("Failed to retrieve {0}".format(url)) +            attempts += 1 + +            # En-tête tiré de mon Firefox… +            req = super(Parser, self)._make_request( +                url, params=params, +                headers={"Accept-Language": "en-US,en;q=0.5"}, +            ) +            req.raise_for_status() -        # En-tête tiré de mon Firefox… -        base_req = self._make_request( -            source.url, headers={"Accept-Language": "en-US,en;q=0.5"} -        ) +            parser = lxml.html.HTMLParser(encoding="utf8") +            soup = lxml.html.document_fromstring(req.content, parser=parser) +            events, found = find_events_list(soup) -        parser = lxml.html.HTMLParser(encoding="utf-8") -        self.soup = lxml.html.document_fromstring( -            base_req.content, parser=parser -        ) +        if date is None: +            self.months = [] +            for option in soup.xpath("//option"): +                if option.get("selected") is not None or len(self.months) > 0: +                    self.months.append(option.text) -        self.months = [] -        for option in self.soup.xpath("//option"): -            if option.get("selected") is not None or len(self.months) > 0: -                self.months.append(option.text) +        return events      def __get_event(self, event, today,                      beginning_of_month, end_of_month, @@ -179,19 +200,7 @@ class Parser(AbstractParser):          )          month_str = month.replace(day=first_monday).strftime("%Y%m%d") -        req = self._make_request( -            self.source.url, -            headers={ -                "Accept-Language": "en-US,en;q=0.5", -            }, -            params={"Date": month_str}, -        ) -        req.raise_for_status() - -        parser = lxml.html.HTMLParser(encoding="utf8") -        soup = lxml.html.document_fromstring(req.content, parser=parser) - -        return find_events_list(soup) +        return self._make_request(self.source.url, month_str)      @asyncio.coroutine      def get_months_async(self): @@ -217,7 +226,5 @@ class Parser(AbstractParser):          return events      def get_source(self): -        self.events = [ -            find_events_list(self.soup) -        ] + self.get_source_from_months() +        self.events += self.get_source_from_months()          return self.events | 
