From 3786f8ac9be60d6f05a8281564270225b03f5326 Mon Sep 17 00:00:00 2001 From: Alban Gruin Date: Wed, 13 Feb 2019 19:59:07 +0100 Subject: commands: ajout d’un modèle abstrait de commande accédant au parseur Pour les besoins de l’ajout de la notion de module, un nouvel outil permettant de lister les attributs d’un cours sera rajouté. À l’instar de timetables, il fera aussi appel au parseur. Pour éviter de dupliquer du code, la partie accès au parseur est déplacé dans une autre classe. Signed-off-by: Alban Gruin --- management/commands/__parsercommand.py | 26 ++++++++++++++++++++++++++ management/commands/timetables.py | 16 ++++------------ 2 files changed, 30 insertions(+), 12 deletions(-) create mode 100644 management/commands/__parsercommand.py (limited to 'management') diff --git a/management/commands/__parsercommand.py b/management/commands/__parsercommand.py new file mode 100644 index 0000000..99480cc --- /dev/null +++ b/management/commands/__parsercommand.py @@ -0,0 +1,26 @@ +# Copyright (C) 2019 Alban Gruin +# +# celcatsanitizer is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# celcatsanitizer is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with celcatsanitizer. If not, see . + +from importlib import import_module + +from django.conf import settings + +DEFAULT_PARSER = "edt.management.parsers.ups2017" + + +class ParserCommand: + def get_parser(self): + parser_module = getattr(settings, "CS_PARSER", DEFAULT_PARSER) + return getattr(import_module(parser_module), "Parser") diff --git a/management/commands/timetables.py b/management/commands/timetables.py index ee33f7e..f71accf 100644 --- a/management/commands/timetables.py +++ b/management/commands/timetables.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Alban Gruin +# Copyright (C) 2017-2019 Alban Gruin # # celcatsanitizer is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published @@ -13,20 +13,16 @@ # You should have received a copy of the GNU Affero General Public License # along with celcatsanitizer. If not, see . -from importlib import import_module - import datetime import traceback -from django.conf import settings from django.core.management.base import BaseCommand from django.db import transaction from django.db.models import Min from ...models import Course, Source from ...utils import get_week, tz_now - -DEFAULT_PARSER = "edt.management.parsers.ups2017" +from .__parsercommand import ParserCommand def delete_courses_in_week(source, year, week, today): @@ -122,7 +118,7 @@ def process_timetable(source, force, parser_cls, year=None, weeks=None): process_timetable_week(source, force, parser) -class Command(BaseCommand): +class Command(BaseCommand, ParserCommand): help = "Fetches registered celcat timetables" def add_arguments(self, parser): @@ -134,14 +130,10 @@ class Command(BaseCommand): nargs="+") parser.add_argument("--year", type=int, nargs=1) - def __get_parser(self): - parser_module = getattr(settings, "CS_PARSER", DEFAULT_PARSER) - return getattr(import_module(parser_module), "Parser") - def handle(self, *args, **options): year = None errcount = 0 - parser = self.__get_parser() + parser = self.get_parser() if options["all"]: weeks = None -- cgit v1.2.1 From d4060a8336554b6f7e154785a1f51f802ee90492 Mon Sep 17 00:00:00 2001 From: Alban Gruin Date: Wed, 13 Feb 2019 20:04:03 +0100 Subject: commands: ajout d’une commande pour lister les propriétés des cours Pour pouvoir analyser plus facilement les attributs d’un cours, un nouvel outil est rajouté pour lister tous les attributs d’un ou plusieurs cours. Il fait appel au parseur et permet de sélectionner une source, et de limiter le nombre de cours affichés. Signed-off-by: Alban Gruin --- management/commands/printvalues.py | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 management/commands/printvalues.py (limited to 'management') diff --git a/management/commands/printvalues.py b/management/commands/printvalues.py new file mode 100644 index 0000000..91dd18b --- /dev/null +++ b/management/commands/printvalues.py @@ -0,0 +1,45 @@ +# Copyright (C) 2019 Alban Gruin +# +# celcatsanitizer is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# celcatsanitizer is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with celcatsanitizer. If not, see . + +from django.core.management.base import BaseCommand + +from ...models import Source +from .__parsercommand import ParserCommand + +import json + + +class Command(BaseCommand, ParserCommand): + help = "List values from courses from a source" + + def add_arguments(self, parser): + parser.add_argument("--source", type=int, nargs=1, required=True) + parser.add_argument("--limit", type=int, nargs=1) + + def handle(self, *args, **options): + source = Source.objects.get(pk=options["source"][0]) + parser = self.get_parser()(source) + events = [event for month in parser.get_source() for event in month] + + i = 0 + limit = len(events) + if options["limit"] is not None: + limit = min(options["limit"][0], limit) + + while i < limit: + self.stdout.write(json.dumps(events[i], indent=4, sort_keys=True)) + i += 1 + + self.stdout.write(self.style.SUCCESS("Done.")) -- cgit v1.2.1 From b2eaa3be85b30732c4e6c083c6f0413c02dbaec4 Mon Sep 17 00:00:00 2001 From: Alban Gruin Date: Thu, 3 Jan 2019 01:11:23 +0100 Subject: UPS2018: ajout du champ celcat_id Les cours dans celcat ont un champ id. Ce changement permet de le stocker dans la base de données (sous la forme d’un entier) et de l’afficher dans l’interface d’administration. Pour l’instant, on ne sait pas si cette valeur est unique ou non. Il n’y a donc pas de contraintes sur ce champ pour le moment. Signed-off-by: Alban Gruin --- management/parsers/ups2018.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'management') diff --git a/management/parsers/ups2018.py b/management/parsers/ups2018.py index f1da5bf..e3afbe5 100644 --- a/management/parsers/ups2018.py +++ b/management/parsers/ups2018.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 Alban Gruin +# Copyright (C) 2018-2019 Alban Gruin # # celcatsanitizer is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published @@ -114,7 +114,8 @@ class Parser(AbstractParser): return course = Course.objects.create( - source=self.source, begin=begin, end=end + source=self.source, begin=begin, end=end, + celcat_id=int(event["id"]) ) min_i = 0 -- cgit v1.2.1 From 297632390e6ec051e315e6d9545d0110a41a8880 Mon Sep 17 00:00:00 2001 From: Alban Gruin Date: Wed, 13 Feb 2019 22:02:09 +0100 Subject: UPS2018: récupération du module (UE) correspondant à un cours Il peut être intéressant de lister les cours par module (UE). Une table est donc rajoutée pour stocker cette information et permettre d’effectuer des recherches et des tris. Signed-off-by: Alban Gruin --- management/parsers/ups2018.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'management') diff --git a/management/parsers/ups2018.py b/management/parsers/ups2018.py index e3afbe5..afbfc4b 100644 --- a/management/parsers/ups2018.py +++ b/management/parsers/ups2018.py @@ -26,7 +26,7 @@ from django.utils import timezone import lxml.html import requests -from ...models import Course, Group, Room +from ...models import Course, Group, Module, Room from ...utils import get_current_week, get_week from .abstractparser import AbstractParser, ParserError @@ -137,6 +137,13 @@ class Parser(AbstractParser): # par un dictionnaire classique. names = OrderedDict.fromkeys(data[i - 1].split(';')) course.name = ", ".join(names.keys()) + + module_names = [t for t in event["tag"] + if len(t) > 0 and + any(n.startswith(t) for n in names.keys())] + if len(module_names) > 0: + module, _ = Module.objects.get_or_create(name=module_names[0]) + course.module = module else: course.name = "Sans nom" if i - 2 >= min_i: -- cgit v1.2.1 From a235752368c6eff21400f6f8089ee3bf781cf36e Mon Sep 17 00:00:00 2001 From: Alban Gruin Date: Fri, 30 Aug 2019 12:27:00 +0200 Subject: ups2018: déplacement des préfixes de cours dans une constante Le parseur UPS2018 va se servir de cette liste aussi, elle est donc déplacée dans sa propre constante. Signed-off-by: Alban Gruin --- management/parsers/ups2018.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'management') diff --git a/management/parsers/ups2018.py b/management/parsers/ups2018.py index afbfc4b..0d6d798 100644 --- a/management/parsers/ups2018.py +++ b/management/parsers/ups2018.py @@ -32,6 +32,10 @@ from .abstractparser import AbstractParser, ParserError VARNAME = "v.events.list = " +GROUP_PREFIXES = ("L1 ", "L2 ", "L3 ", "L3P ", "M1 ", "M2 ", "DEUST ", "MAG1 ", + "1ERE ANNEE ", "2EME ANNEE ", "3EME ANNEE ", + "MAT-Agreg Interne ") + def find_events_list(soup): res = [] @@ -123,11 +127,7 @@ class Parser(AbstractParser): min_i = 1 i = min_i - while i < len(data) and not data[i].startswith( - ("L1 ", "L2 ", "L3 ", "L3P ", "M1 ", "M2 ", "DEUST ", "MAG1 ", - "1ERE ANNEE ", "2EME ANNEE ", "3EME ANNEE ", - "MAT-Agreg Interne ") - ): + while i < len(data) and not data[i].startswith(GROUP_PREFIXES): i += 1 groups = data[i] -- cgit v1.2.1 From 0717c8ccd6ac10989d86593ff73a86a0c4398408 Mon Sep 17 00:00:00 2001 From: Alban Gruin Date: Thu, 29 Aug 2019 12:53:05 +0200 Subject: ups2019: nouveau parseur pour le format UPS2019 Signed-off-by: Alban Gruin --- management/parsers/ups2019.py | 128 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 management/parsers/ups2019.py (limited to 'management') diff --git a/management/parsers/ups2019.py b/management/parsers/ups2019.py new file mode 100644 index 0000000..c7ab7c9 --- /dev/null +++ b/management/parsers/ups2019.py @@ -0,0 +1,128 @@ +# Copyright (C) 2019 Alban Gruin +# +# celcatsanitizer is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# celcatsanitizer is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with celcatsanitizer. If not, see . + +from datetime import date, datetime, timedelta +from html import unescape + +from django.utils import timezone + +import requests + +from ...models import Course, Group, Module, Room +from ...utils import get_current_week, get_week +from .abstractparser import AbstractParser +from .ups2018 import GROUP_PREFIXES + + +class Parser(AbstractParser): + def __get_name(self, raw_name): + return raw_name.split('[')[1][:-1] + + def __get_event(self, event, year, week): + if event["allDay"]: + return + + begin = timezone.make_aware( + datetime.strptime(event["start"], "%Y-%m-%dT%H:%M:%S") + ) + end = timezone.make_aware( + datetime.strptime(event["end"], "%Y-%m-%dT%H:%M:%S") + ) + + if year is not None and week is not None: + event_year, event_week, _ = begin.isocalendar() + if event_year != year or event_week != week: + return + + data = [unescape(st.strip()) + for st in event["description"].split("
")] + groups = [] + rooms = [] + + course = Course.objects.create( + source=self.source, begin=begin, end=end, + celcat_id=event["id"] + ) + + i = 0 + if event.get("eventCategory") is not None and \ + len(event.get("eventCategory", "")) > 0: + course.type = event["eventCategory"] + i = 1 + + if event.get("module", "") is not None and \ + len(event.get("module", "")) > 0: + module, _ = Module.objects.get_or_create(name=event["module"]) + course.module = module + + if '[' in data[i]: + course.name = self.__get_name(data[i]) + i += 1 + + while '[' in data[i]: + course.name += ", " + self.__get_name(data[i]) + i += 1 + + while i < len(data) and not data[i].startswith(GROUP_PREFIXES): + rooms.append(data[i]) + i += 1 + course.rooms.add(*Room.objects.filter(name__in=rooms)) + + while i < len(data) and data[i].startswith(GROUP_PREFIXES): + groups.append(Group.objects.get_or_create( + source=self.source, celcat_name=data[i] + )[0]) + i += 1 + course.groups.add(*groups) + + if i < len(data): + course.notes = "\n".join(data[i:]).strip() + if "other" in course.notes: + print("Warning: 'other' in course.notes") + + return course + + def get_events(self, today, year=None, week=None): + for event in self.events: + course = self.__get_event(event, year, week) + if course is not None: + yield course + + def get_update_date(self): + return + + def get_weeks(self): + # FIXME: détection automatique à partir des événements présents + beginning, _ = get_week(*get_current_week()) + self.weeks = {"1": beginning} + + return self.weeks + + def get_source(self): + start = date.today() + end = start + timedelta(days=365) + + req = requests.post(self.source.url, + headers={"User-Agent": self.user_agent}, + data={"calView": "month", + "resType": 103, + "federationIds[]": self.source.metadata, + "start": start.strftime("%Y-%m-%d"), + "end": end.strftime("%Y-%m-%d")}) + req.encoding = "uft8" + req.raise_for_status() + + self.events = req.json() + return self.events -- cgit v1.2.1 From a85f2fb91d1a4a0e31c41c392e404d7ddbf21109 Mon Sep 17 00:00:00 2001 From: Alban Gruin Date: Sun, 1 Sep 2019 12:41:19 +0200 Subject: ups2019: réadaptation du parseur L’emplacement des différents éléments a changé, mdr. Signed-off-by: Alban Gruin --- management/parsers/ups2019.py | 42 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 22 deletions(-) (limited to 'management') diff --git a/management/parsers/ups2019.py b/management/parsers/ups2019.py index c7ab7c9..c6bd7e3 100644 --- a/management/parsers/ups2019.py +++ b/management/parsers/ups2019.py @@ -27,9 +27,6 @@ from .ups2018 import GROUP_PREFIXES class Parser(AbstractParser): - def __get_name(self, raw_name): - return raw_name.split('[')[1][:-1] - def __get_event(self, event, year, week): if event["allDay"]: return @@ -56,41 +53,42 @@ class Parser(AbstractParser): celcat_id=event["id"] ) - i = 0 + max_i = len(data) + if event.get("eventCategory") is not None and \ len(event.get("eventCategory", "")) > 0: course.type = event["eventCategory"] - i = 1 + max_i -= 1 if event.get("module", "") is not None and \ len(event.get("module", "")) > 0: module, _ = Module.objects.get_or_create(name=event["module"]) course.module = module - if '[' in data[i]: - course.name = self.__get_name(data[i]) - i += 1 - - while '[' in data[i]: - course.name += ", " + self.__get_name(data[i]) - i += 1 - - while i < len(data) and not data[i].startswith(GROUP_PREFIXES): + i = 0 + while i < max_i and not data[i].startswith(GROUP_PREFIXES): rooms.append(data[i]) i += 1 course.rooms.add(*Room.objects.filter(name__in=rooms)) - while i < len(data) and data[i].startswith(GROUP_PREFIXES): - groups.append(Group.objects.get_or_create( - source=self.source, celcat_name=data[i] - )[0]) + if len(rooms) != course.rooms.count(): + print(rooms, course.rooms) + + while i < max_i and data[i].startswith(GROUP_PREFIXES): + group, _ = Group.objects.get_or_create(source=self.source, + celcat_name=data[i]) + groups.append(group) i += 1 course.groups.add(*groups) - if i < len(data): - course.notes = "\n".join(data[i:]).strip() - if "other" in course.notes: - print("Warning: 'other' in course.notes") + if i < max_i and course.module is not None and \ + data[i].startswith(course.module.name): + course.name = data[i] + i += 1 + + course.notes = "\n".join(data[i:max_i]).strip() + if "other" in data[i]: + print("Warning: \"other\" in notes") return course -- cgit v1.2.1 From d1369ea3654b56e6a91335bd108035cd5eecbc76 Mon Sep 17 00:00:00 2001 From: Alban Gruin Date: Fri, 30 Aug 2019 14:00:11 +0200 Subject: ups2018: correction d’une erreur de syntaxe avec Python 3.7 `async' est devenu un mot-clef avec Python 3.7. Or, un paramètre est appelé de cette manière dans le parseur UPS2018. Ceci le renome en `asynchronous' pour corriger ce problème Signed-off-by: Alban Gruin --- management/parsers/ups2018.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'management') diff --git a/management/parsers/ups2018.py b/management/parsers/ups2018.py index 0d6d798..ad8322c 100644 --- a/management/parsers/ups2018.py +++ b/management/parsers/ups2018.py @@ -221,10 +221,10 @@ class Parser(AbstractParser): responses = yield from asyncio.gather(*futures) return responses - def get_source_from_months(self, async=True): + def get_source_from_months(self, asynchronous=True): events = [] - if async: + if asynchronous: loop = asyncio.get_event_loop() events = loop.run_until_complete(self.get_months_async()) else: -- cgit v1.2.1