aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlban Gruin2018-10-09 20:43:01 +0200
committerAlban Gruin2018-10-09 20:43:01 +0200
commit709919377593c60b9f03796433bd5e58ad553d67 (patch)
tree20e0d7cbca5ab42e52a316973c32fd2de3d67488
parentfc1fc472931cae0ba3d0a0103c328f85b7b52586 (diff)
parent171472d7dc42e2d3b390ad8b052c7e88fca21722 (diff)
Merge branch 'stable/0.14.z' into prod/pa1ch/0.14.zv0.14.3-pa1ch
Signed-off-by: Alban Gruin <alban at pa1ch dot fr>
-rw-r--r--Documentation/conf.py2
-rw-r--r--Documentation/usage/versions.rst8
-rw-r--r--__init__.py2
-rw-r--r--management/parsers/abstractparser.py5
-rw-r--r--management/parsers/ups2018.py67
5 files changed, 52 insertions, 32 deletions
diff --git a/Documentation/conf.py b/Documentation/conf.py
index ad8660d..c0ce370 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -15,7 +15,7 @@ copyright = u'%d, Alban Gruin' % year
author = u'Alban Gruin'
version = u'0.14'
-release = u'0.14.2'
+release = u'0.14.3'
language = 'fr'
diff --git a/Documentation/usage/versions.rst b/Documentation/usage/versions.rst
index b0a1fb4..71f122d 100644
--- a/Documentation/usage/versions.rst
+++ b/Documentation/usage/versions.rst
@@ -105,3 +105,11 @@ Version 0.14.2
utilisée jusqu’alors faisait que l’ordre des noms n’était pas
forcément identique d’une mise à jour à une autre. Cette technique
a été changée par une autre permettant de conserver cet ordre.
+
+Version 0.14.3
+--------------
+ - Il arrive que la source renvoie des pages incomplètes ne contenant
+ aucun cours, ce qui peut donner des mois complètement vides. Ajout
+ d’une vérification lors de la récupération des pages ; si une page
+ est invalide, elle est re-demandée tant qu’elle est incomplète, et
+ ce trois fois au maximum.
diff --git a/__init__.py b/__init__.py
index a67d67f..5140a14 100644
--- a/__init__.py
+++ b/__init__.py
@@ -13,7 +13,7 @@
# You should have received a copy of the GNU Affero General Public License
# along with celcatsanitizer. If not, see <http://www.gnu.org/licenses/>.
-VERSION = "0.14.2"
+VERSION = "0.14.3"
__version__ = VERSION
default_app_config = "edt.apps.EdtConfig"
diff --git a/management/parsers/abstractparser.py b/management/parsers/abstractparser.py
index 8d55b6d..3164082 100644
--- a/management/parsers/abstractparser.py
+++ b/management/parsers/abstractparser.py
@@ -50,3 +50,8 @@ class AbstractParser(metaclass=abc.ABCMeta):
def get_source(self):
return self._make_request(self.source.url)
+
+
+class ParserError(Exception):
+ def __init__(self, message):
+ super(Exception, self).__init__(message)
diff --git a/management/parsers/ups2018.py b/management/parsers/ups2018.py
index 522a26a..f1da5bf 100644
--- a/management/parsers/ups2018.py
+++ b/management/parsers/ups2018.py
@@ -28,20 +28,22 @@ import requests
from ...models import Course, Group, Room
from ...utils import get_current_week, get_week
-from .abstractparser import AbstractParser
+from .abstractparser import AbstractParser, ParserError
VARNAME = "v.events.list = "
def find_events_list(soup):
res = []
+ found = False
for script in soup.xpath("//script/text()"):
if VARNAME in script:
for var in script.split('\n'):
if var.startswith(VARNAME):
res = json.loads(var[len(VARNAME):-2])
+ found = True
- return res
+ return res, found
def get_next_month(dt):
@@ -52,21 +54,40 @@ def get_next_month(dt):
class Parser(AbstractParser):
def __init__(self, source):
super(Parser, self).__init__(source)
+ self.events = [self._make_request(source.url)]
+ self.source = source
+
+ def _make_request(self, url, date=None):
+ events, found = [], False
+ attempts = 0
+ params = {}
+
+ if date is not None:
+ params["Date"] = date
+
+ while not found:
+ if attempts == 3:
+ raise ParserError("Failed to retrieve {0}".format(url))
+ attempts += 1
+
+ # En-tête tiré de mon Firefox…
+ req = super(Parser, self)._make_request(
+ url, params=params,
+ headers={"Accept-Language": "en-US,en;q=0.5"},
+ )
+ req.raise_for_status()
- # En-tête tiré de mon Firefox…
- base_req = self._make_request(
- source.url, headers={"Accept-Language": "en-US,en;q=0.5"}
- )
+ parser = lxml.html.HTMLParser(encoding="utf8")
+ soup = lxml.html.document_fromstring(req.content, parser=parser)
+ events, found = find_events_list(soup)
- parser = lxml.html.HTMLParser(encoding="utf-8")
- self.soup = lxml.html.document_fromstring(
- base_req.content, parser=parser
- )
+ if date is None:
+ self.months = []
+ for option in soup.xpath("//option"):
+ if option.get("selected") is not None or len(self.months) > 0:
+ self.months.append(option.text)
- self.months = []
- for option in self.soup.xpath("//option"):
- if option.get("selected") is not None or len(self.months) > 0:
- self.months.append(option.text)
+ return events
def __get_event(self, event, today,
beginning_of_month, end_of_month,
@@ -179,19 +200,7 @@ class Parser(AbstractParser):
)
month_str = month.replace(day=first_monday).strftime("%Y%m%d")
- req = self._make_request(
- self.source.url,
- headers={
- "Accept-Language": "en-US,en;q=0.5",
- },
- params={"Date": month_str},
- )
- req.raise_for_status()
-
- parser = lxml.html.HTMLParser(encoding="utf8")
- soup = lxml.html.document_fromstring(req.content, parser=parser)
-
- return find_events_list(soup)
+ return self._make_request(self.source.url, month_str)
@asyncio.coroutine
def get_months_async(self):
@@ -217,7 +226,5 @@ class Parser(AbstractParser):
return events
def get_source(self):
- self.events = [
- find_events_list(self.soup)
- ] + self.get_source_from_months()
+ self.events += self.get_source_from_months()
return self.events