From 536061a041b5ea0ea0b64a8f4e5fa3b99a49861a Mon Sep 17 00:00:00 2001 From: Alban Gruin Date: Tue, 9 Oct 2018 20:29:56 +0200 Subject: parsers: ajout d’une exception pour les parseurs Signed-off-by: Alban Gruin --- management/parsers/abstractparser.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/management/parsers/abstractparser.py b/management/parsers/abstractparser.py index 8d55b6d..3164082 100644 --- a/management/parsers/abstractparser.py +++ b/management/parsers/abstractparser.py @@ -50,3 +50,8 @@ class AbstractParser(metaclass=abc.ABCMeta): def get_source(self): return self._make_request(self.source.url) + + +class ParserError(Exception): + def __init__(self, message): + super(Exception, self).__init__(message) -- cgit v1.2.1 From 11f340b4c3adb4200ff41e7e4587392b10b13e47 Mon Sep 17 00:00:00 2001 From: Alban Gruin Date: Tue, 9 Oct 2018 20:30:07 +0200 Subject: ups2018: vérification de la présence des événements dans la page Il arrive que les pages retournées par la source sont incomplètes et ne contiennent pas d’événement, et parfois des mois entiers sont vides. On tente donc de récupérer une page trois fois au maximum, et, si cela échoue toujours, on abandonne en renvoyant une exception. Signed-off-by: Alban Gruin --- management/parsers/ups2018.py | 67 ++++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/management/parsers/ups2018.py b/management/parsers/ups2018.py index 522a26a..f1da5bf 100644 --- a/management/parsers/ups2018.py +++ b/management/parsers/ups2018.py @@ -28,20 +28,22 @@ import requests from ...models import Course, Group, Room from ...utils import get_current_week, get_week -from .abstractparser import AbstractParser +from .abstractparser import AbstractParser, ParserError VARNAME = "v.events.list = " def find_events_list(soup): res = [] + found = False for script in soup.xpath("//script/text()"): if VARNAME in script: for var in script.split('\n'): if var.startswith(VARNAME): res = json.loads(var[len(VARNAME):-2]) + found = True - return res + return res, found def get_next_month(dt): @@ -52,21 +54,40 @@ def get_next_month(dt): class Parser(AbstractParser): def __init__(self, source): super(Parser, self).__init__(source) + self.events = [self._make_request(source.url)] + self.source = source + + def _make_request(self, url, date=None): + events, found = [], False + attempts = 0 + params = {} + + if date is not None: + params["Date"] = date + + while not found: + if attempts == 3: + raise ParserError("Failed to retrieve {0}".format(url)) + attempts += 1 + + # En-tête tiré de mon Firefox… + req = super(Parser, self)._make_request( + url, params=params, + headers={"Accept-Language": "en-US,en;q=0.5"}, + ) + req.raise_for_status() - # En-tête tiré de mon Firefox… - base_req = self._make_request( - source.url, headers={"Accept-Language": "en-US,en;q=0.5"} - ) + parser = lxml.html.HTMLParser(encoding="utf8") + soup = lxml.html.document_fromstring(req.content, parser=parser) + events, found = find_events_list(soup) - parser = lxml.html.HTMLParser(encoding="utf-8") - self.soup = lxml.html.document_fromstring( - base_req.content, parser=parser - ) + if date is None: + self.months = [] + for option in soup.xpath("//option"): + if option.get("selected") is not None or len(self.months) > 0: + self.months.append(option.text) - self.months = [] - for option in self.soup.xpath("//option"): - if option.get("selected") is not None or len(self.months) > 0: - self.months.append(option.text) + return events def __get_event(self, event, today, beginning_of_month, end_of_month, @@ -179,19 +200,7 @@ class Parser(AbstractParser): ) month_str = month.replace(day=first_monday).strftime("%Y%m%d") - req = self._make_request( - self.source.url, - headers={ - "Accept-Language": "en-US,en;q=0.5", - }, - params={"Date": month_str}, - ) - req.raise_for_status() - - parser = lxml.html.HTMLParser(encoding="utf8") - soup = lxml.html.document_fromstring(req.content, parser=parser) - - return find_events_list(soup) + return self._make_request(self.source.url, month_str) @asyncio.coroutine def get_months_async(self): @@ -217,7 +226,5 @@ class Parser(AbstractParser): return events def get_source(self): - self.events = [ - find_events_list(self.soup) - ] + self.get_source_from_months() + self.events += self.get_source_from_months() return self.events -- cgit v1.2.1 From eb6d8ce7241717e621089a7b790dbafbcf9eed69 Mon Sep 17 00:00:00 2001 From: Alban Gruin Date: Tue, 9 Oct 2018 20:40:44 +0200 Subject: doc: mise à jour de la documentation Signed-off-by: Alban Gruin --- Documentation/usage/versions.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/usage/versions.rst b/Documentation/usage/versions.rst index b0a1fb4..71f122d 100644 --- a/Documentation/usage/versions.rst +++ b/Documentation/usage/versions.rst @@ -105,3 +105,11 @@ Version 0.14.2 utilisée jusqu’alors faisait que l’ordre des noms n’était pas forcément identique d’une mise à jour à une autre. Cette technique a été changée par une autre permettant de conserver cet ordre. + +Version 0.14.3 +-------------- + - Il arrive que la source renvoie des pages incomplètes ne contenant + aucun cours, ce qui peut donner des mois complètement vides. Ajout + d’une vérification lors de la récupération des pages ; si une page + est invalide, elle est re-demandée tant qu’elle est incomplète, et + ce trois fois au maximum. -- cgit v1.2.1 From 171472d7dc42e2d3b390ad8b052c7e88fca21722 Mon Sep 17 00:00:00 2001 From: Alban Gruin Date: Tue, 9 Oct 2018 20:40:55 +0200 Subject: Version 0.14.3 Signed-off-by: Alban Gruin --- Documentation/conf.py | 2 +- __init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/conf.py b/Documentation/conf.py index ad8660d..c0ce370 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -15,7 +15,7 @@ copyright = u'%d, Alban Gruin' % year author = u'Alban Gruin' version = u'0.14' -release = u'0.14.2' +release = u'0.14.3' language = 'fr' diff --git a/__init__.py b/__init__.py index a67d67f..5140a14 100644 --- a/__init__.py +++ b/__init__.py @@ -13,7 +13,7 @@ # You should have received a copy of the GNU Affero General Public License # along with celcatsanitizer. If not, see . -VERSION = "0.14.2" +VERSION = "0.14.3" __version__ = VERSION default_app_config = "edt.apps.EdtConfig" -- cgit v1.2.1