5 Commits

Author SHA1 Message Date
  Alban Gruin 171472d7dc Version 0.14.3 10 months ago
  Alban Gruin eb6d8ce724 doc: mise à jour de la documentation 10 months ago
  Alban Gruin b2bd76229c Merge branch 'ag/ups2018-correctifs-apres-tests' 10 months ago
  Alban Gruin 11f340b4c3 ups2018: vérification de la présence des événements dans la page 10 months ago
  Alban Gruin 536061a041 parsers: ajout d’une exception pour les parseurs 10 months ago

+ 1
- 1
Documentation/conf.py View File

@@ -15,7 +15,7 @@ copyright = u'%d, Alban Gruin' % year
15 15
 author = u'Alban Gruin'
16 16
 
17 17
 version = u'0.14'
18
-release = u'0.14.2'
18
+release = u'0.14.3'
19 19
 
20 20
 language = 'fr'
21 21
 

+ 8
- 0
Documentation/usage/versions.rst View File

@@ -105,3 +105,11 @@ Version 0.14.2
105 105
    utilisée jusqu’alors faisait que l’ordre des noms n’était pas
106 106
    forcément identique d’une mise à jour à une autre. Cette technique
107 107
    a été changée par une autre permettant de conserver cet ordre.
108
+
109
+Version 0.14.3
110
+--------------
111
+ - Il arrive que la source renvoie des pages incomplètes ne contenant
112
+   aucun cours, ce qui peut donner des mois complètement vides. Ajout
113
+   d’une vérification lors de la récupération des pages ; si une page
114
+   est invalide, elle est re-demandée tant qu’elle est incomplète, et
115
+   ce trois fois au maximum.

+ 1
- 1
__init__.py View File

@@ -13,7 +13,7 @@
13 13
 #    You should have received a copy of the GNU Affero General Public License
14 14
 #    along with celcatsanitizer.  If not, see <http://www.gnu.org/licenses/>.
15 15
 
16
-VERSION = "0.14.2"
16
+VERSION = "0.14.3"
17 17
 __version__ = VERSION
18 18
 
19 19
 default_app_config = "edt.apps.EdtConfig"

+ 5
- 0
management/parsers/abstractparser.py View File

@@ -50,3 +50,8 @@ class AbstractParser(metaclass=abc.ABCMeta):
50 50
 
51 51
     def get_source(self):
52 52
         return self._make_request(self.source.url)
53
+
54
+
55
+class ParserError(Exception):
56
+    def __init__(self, message):
57
+        super(Exception, self).__init__(message)

+ 37
- 30
management/parsers/ups2018.py View File

@@ -28,20 +28,22 @@ import requests
28 28
 
29 29
 from ...models import Course, Group, Room
30 30
 from ...utils import get_current_week, get_week
31
-from .abstractparser import AbstractParser
31
+from .abstractparser import AbstractParser, ParserError
32 32
 
33 33
 VARNAME = "v.events.list = "
34 34
 
35 35
 
36 36
 def find_events_list(soup):
37 37
     res = []
38
+    found = False
38 39
     for script in soup.xpath("//script/text()"):
39 40
         if VARNAME in script:
40 41
             for var in script.split('\n'):
41 42
                 if var.startswith(VARNAME):
42 43
                     res = json.loads(var[len(VARNAME):-2])
44
+                    found = True
43 45
 
44
-    return res
46
+    return res, found
45 47
 
46 48
 
47 49
 def get_next_month(dt):
@@ -52,21 +54,40 @@ def get_next_month(dt):
52 54
 class Parser(AbstractParser):
53 55
     def __init__(self, source):
54 56
         super(Parser, self).__init__(source)
57
+        self.events = [self._make_request(source.url)]
58
+        self.source = source
59
+
60
+    def _make_request(self, url, date=None):
61
+        events, found = [], False
62
+        attempts = 0
63
+        params = {}
64
+
65
+        if date is not None:
66
+            params["Date"] = date
67
+
68
+        while not found:
69
+            if attempts == 3:
70
+                raise ParserError("Failed to retrieve {0}".format(url))
71
+            attempts += 1
72
+
73
+            # En-tête tiré de mon Firefox…
74
+            req = super(Parser, self)._make_request(
75
+                url, params=params,
76
+                headers={"Accept-Language": "en-US,en;q=0.5"},
77
+            )
78
+            req.raise_for_status()
55 79
 
56
-        # En-tête tiré de mon Firefox…
57
-        base_req = self._make_request(
58
-            source.url, headers={"Accept-Language": "en-US,en;q=0.5"}
59
-        )
80
+            parser = lxml.html.HTMLParser(encoding="utf8")
81
+            soup = lxml.html.document_fromstring(req.content, parser=parser)
82
+            events, found = find_events_list(soup)
60 83
 
61
-        parser = lxml.html.HTMLParser(encoding="utf-8")
62
-        self.soup = lxml.html.document_fromstring(
63
-            base_req.content, parser=parser
64
-        )
84
+        if date is None:
85
+            self.months = []
86
+            for option in soup.xpath("//option"):
87
+                if option.get("selected") is not None or len(self.months) > 0:
88
+                    self.months.append(option.text)
65 89
 
66
-        self.months = []
67
-        for option in self.soup.xpath("//option"):
68
-            if option.get("selected") is not None or len(self.months) > 0:
69
-                self.months.append(option.text)
90
+        return events
70 91
 
71 92
     def __get_event(self, event, today,
72 93
                     beginning_of_month, end_of_month,
@@ -179,19 +200,7 @@ class Parser(AbstractParser):
179 200
         )
180 201
         month_str = month.replace(day=first_monday).strftime("%Y%m%d")
181 202
 
182
-        req = self._make_request(
183
-            self.source.url,
184
-            headers={
185
-                "Accept-Language": "en-US,en;q=0.5",
186
-            },
187
-            params={"Date": month_str},
188
-        )
189
-        req.raise_for_status()
190
-
191
-        parser = lxml.html.HTMLParser(encoding="utf8")
192
-        soup = lxml.html.document_fromstring(req.content, parser=parser)
193
-
194
-        return find_events_list(soup)
203
+        return self._make_request(self.source.url, month_str)
195 204
 
196 205
     @asyncio.coroutine
197 206
     def get_months_async(self):
@@ -217,7 +226,5 @@ class Parser(AbstractParser):
217 226
         return events
218 227
 
219 228
     def get_source(self):
220
-        self.events = [
221
-            find_events_list(self.soup)
222
-        ] + self.get_source_from_months()
229
+        self.events += self.get_source_from_months()
223 230
         return self.events