diff options
author | Alban Gruin | 2018-10-09 20:34:46 +0200 |
---|---|---|
committer | Alban Gruin | 2018-10-09 20:34:46 +0200 |
commit | b2bd76229ca012024f6cbea29af25d0ed170d213 (patch) | |
tree | ba5a58e6c8779dfa347fd26fe322fb3bc729f746 | |
parent | c95740a4d97fc579f4f12a6abf01eac446fde1cc (diff) | |
parent | 11f340b4c3adb4200ff41e7e4587392b10b13e47 (diff) |
Merge branch 'ag/ups2018-correctifs-apres-tests'
Signed-off-by: Alban Gruin <alban at pa1ch dot fr>
-rw-r--r-- | management/parsers/abstractparser.py | 5 | ||||
-rw-r--r-- | management/parsers/ups2018.py | 67 |
2 files changed, 42 insertions, 30 deletions
diff --git a/management/parsers/abstractparser.py b/management/parsers/abstractparser.py index 8d55b6d..3164082 100644 --- a/management/parsers/abstractparser.py +++ b/management/parsers/abstractparser.py @@ -50,3 +50,8 @@ class AbstractParser(metaclass=abc.ABCMeta): def get_source(self): return self._make_request(self.source.url) + + +class ParserError(Exception): + def __init__(self, message): + super(Exception, self).__init__(message) diff --git a/management/parsers/ups2018.py b/management/parsers/ups2018.py index 522a26a..f1da5bf 100644 --- a/management/parsers/ups2018.py +++ b/management/parsers/ups2018.py @@ -28,20 +28,22 @@ import requests from ...models import Course, Group, Room from ...utils import get_current_week, get_week -from .abstractparser import AbstractParser +from .abstractparser import AbstractParser, ParserError VARNAME = "v.events.list = " def find_events_list(soup): res = [] + found = False for script in soup.xpath("//script/text()"): if VARNAME in script: for var in script.split('\n'): if var.startswith(VARNAME): res = json.loads(var[len(VARNAME):-2]) + found = True - return res + return res, found def get_next_month(dt): @@ -52,21 +54,40 @@ def get_next_month(dt): class Parser(AbstractParser): def __init__(self, source): super(Parser, self).__init__(source) + self.events = [self._make_request(source.url)] + self.source = source + + def _make_request(self, url, date=None): + events, found = [], False + attempts = 0 + params = {} + + if date is not None: + params["Date"] = date + + while not found: + if attempts == 3: + raise ParserError("Failed to retrieve {0}".format(url)) + attempts += 1 + + # En-tête tiré de mon Firefox… + req = super(Parser, self)._make_request( + url, params=params, + headers={"Accept-Language": "en-US,en;q=0.5"}, + ) + req.raise_for_status() - # En-tête tiré de mon Firefox… - base_req = self._make_request( - source.url, headers={"Accept-Language": "en-US,en;q=0.5"} - ) + parser = lxml.html.HTMLParser(encoding="utf8") + soup = lxml.html.document_fromstring(req.content, parser=parser) + events, found = find_events_list(soup) - parser = lxml.html.HTMLParser(encoding="utf-8") - self.soup = lxml.html.document_fromstring( - base_req.content, parser=parser - ) + if date is None: + self.months = [] + for option in soup.xpath("//option"): + if option.get("selected") is not None or len(self.months) > 0: + self.months.append(option.text) - self.months = [] - for option in self.soup.xpath("//option"): - if option.get("selected") is not None or len(self.months) > 0: - self.months.append(option.text) + return events def __get_event(self, event, today, beginning_of_month, end_of_month, @@ -179,19 +200,7 @@ class Parser(AbstractParser): ) month_str = month.replace(day=first_monday).strftime("%Y%m%d") - req = self._make_request( - self.source.url, - headers={ - "Accept-Language": "en-US,en;q=0.5", - }, - params={"Date": month_str}, - ) - req.raise_for_status() - - parser = lxml.html.HTMLParser(encoding="utf8") - soup = lxml.html.document_fromstring(req.content, parser=parser) - - return find_events_list(soup) + return self._make_request(self.source.url, month_str) @asyncio.coroutine def get_months_async(self): @@ -217,7 +226,5 @@ class Parser(AbstractParser): return events def get_source(self): - self.events = [ - find_events_list(self.soup) - ] + self.get_source_from_months() + self.events += self.get_source_from_months() return self.events |