diff options
author | Alban Gruin | 2019-09-01 20:47:49 +0200 |
---|---|---|
committer | Alban Gruin | 2019-09-01 20:47:49 +0200 |
commit | 00d70c4d65967987c254c72f8bef5fda991f859b (patch) | |
tree | 670b2a78bfeb151096cfa844d6b48559dfc3641b /management | |
parent | fe83f55800f78ff6ced9a13cf5a9c22fde0ead12 (diff) | |
parent | a6eaecf48f63c37cf851386a45cd4b25ac3ed1e0 (diff) |
Merge branch 'futur'
Diffstat (limited to 'management')
-rw-r--r-- | management/commands/__parsercommand.py | 26 | ||||
-rw-r--r-- | management/commands/printvalues.py | 45 | ||||
-rw-r--r-- | management/commands/timetables.py | 16 | ||||
-rw-r--r-- | management/parsers/ups2018.py | 28 | ||||
-rw-r--r-- | management/parsers/ups2019.py | 126 |
5 files changed, 219 insertions, 22 deletions
diff --git a/management/commands/__parsercommand.py b/management/commands/__parsercommand.py new file mode 100644 index 0000000..99480cc --- /dev/null +++ b/management/commands/__parsercommand.py @@ -0,0 +1,26 @@ +# Copyright (C) 2019 Alban Gruin +# +# celcatsanitizer is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# celcatsanitizer is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with celcatsanitizer. If not, see <http://www.gnu.org/licenses/>. + +from importlib import import_module + +from django.conf import settings + +DEFAULT_PARSER = "edt.management.parsers.ups2017" + + +class ParserCommand: + def get_parser(self): + parser_module = getattr(settings, "CS_PARSER", DEFAULT_PARSER) + return getattr(import_module(parser_module), "Parser") diff --git a/management/commands/printvalues.py b/management/commands/printvalues.py new file mode 100644 index 0000000..91dd18b --- /dev/null +++ b/management/commands/printvalues.py @@ -0,0 +1,45 @@ +# Copyright (C) 2019 Alban Gruin +# +# celcatsanitizer is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# celcatsanitizer is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with celcatsanitizer. If not, see <http://www.gnu.org/licenses/>. + +from django.core.management.base import BaseCommand + +from ...models import Source +from .__parsercommand import ParserCommand + +import json + + +class Command(BaseCommand, ParserCommand): + help = "List values from courses from a source" + + def add_arguments(self, parser): + parser.add_argument("--source", type=int, nargs=1, required=True) + parser.add_argument("--limit", type=int, nargs=1) + + def handle(self, *args, **options): + source = Source.objects.get(pk=options["source"][0]) + parser = self.get_parser()(source) + events = [event for month in parser.get_source() for event in month] + + i = 0 + limit = len(events) + if options["limit"] is not None: + limit = min(options["limit"][0], limit) + + while i < limit: + self.stdout.write(json.dumps(events[i], indent=4, sort_keys=True)) + i += 1 + + self.stdout.write(self.style.SUCCESS("Done.")) diff --git a/management/commands/timetables.py b/management/commands/timetables.py index ee33f7e..f71accf 100644 --- a/management/commands/timetables.py +++ b/management/commands/timetables.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 Alban Gruin +# Copyright (C) 2017-2019 Alban Gruin # # celcatsanitizer is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published @@ -13,20 +13,16 @@ # You should have received a copy of the GNU Affero General Public License # along with celcatsanitizer. If not, see <http://www.gnu.org/licenses/>. -from importlib import import_module - import datetime import traceback -from django.conf import settings from django.core.management.base import BaseCommand from django.db import transaction from django.db.models import Min from ...models import Course, Source from ...utils import get_week, tz_now - -DEFAULT_PARSER = "edt.management.parsers.ups2017" +from .__parsercommand import ParserCommand def delete_courses_in_week(source, year, week, today): @@ -122,7 +118,7 @@ def process_timetable(source, force, parser_cls, year=None, weeks=None): process_timetable_week(source, force, parser) -class Command(BaseCommand): +class Command(BaseCommand, ParserCommand): help = "Fetches registered celcat timetables" def add_arguments(self, parser): @@ -134,14 +130,10 @@ class Command(BaseCommand): nargs="+") parser.add_argument("--year", type=int, nargs=1) - def __get_parser(self): - parser_module = getattr(settings, "CS_PARSER", DEFAULT_PARSER) - return getattr(import_module(parser_module), "Parser") - def handle(self, *args, **options): year = None errcount = 0 - parser = self.__get_parser() + parser = self.get_parser() if options["all"]: weeks = None diff --git a/management/parsers/ups2018.py b/management/parsers/ups2018.py index f1da5bf..ad8322c 100644 --- a/management/parsers/ups2018.py +++ b/management/parsers/ups2018.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 Alban Gruin +# Copyright (C) 2018-2019 Alban Gruin # # celcatsanitizer is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published @@ -26,12 +26,16 @@ from django.utils import timezone import lxml.html import requests -from ...models import Course, Group, Room +from ...models import Course, Group, Module, Room from ...utils import get_current_week, get_week from .abstractparser import AbstractParser, ParserError VARNAME = "v.events.list = " +GROUP_PREFIXES = ("L1 ", "L2 ", "L3 ", "L3P ", "M1 ", "M2 ", "DEUST ", "MAG1 ", + "1ERE ANNEE ", "2EME ANNEE ", "3EME ANNEE ", + "MAT-Agreg Interne ") + def find_events_list(soup): res = [] @@ -114,7 +118,8 @@ class Parser(AbstractParser): return course = Course.objects.create( - source=self.source, begin=begin, end=end + source=self.source, begin=begin, end=end, + celcat_id=int(event["id"]) ) min_i = 0 @@ -122,11 +127,7 @@ class Parser(AbstractParser): min_i = 1 i = min_i - while i < len(data) and not data[i].startswith( - ("L1 ", "L2 ", "L3 ", "L3P ", "M1 ", "M2 ", "DEUST ", "MAG1 ", - "1ERE ANNEE ", "2EME ANNEE ", "3EME ANNEE ", - "MAT-Agreg Interne ") - ): + while i < len(data) and not data[i].startswith(GROUP_PREFIXES): i += 1 groups = data[i] @@ -136,6 +137,13 @@ class Parser(AbstractParser): # par un dictionnaire classique. names = OrderedDict.fromkeys(data[i - 1].split(';')) course.name = ", ".join(names.keys()) + + module_names = [t for t in event["tag"] + if len(t) > 0 and + any(n.startswith(t) for n in names.keys())] + if len(module_names) > 0: + module, _ = Module.objects.get_or_create(name=module_names[0]) + course.module = module else: course.name = "Sans nom" if i - 2 >= min_i: @@ -213,10 +221,10 @@ class Parser(AbstractParser): responses = yield from asyncio.gather(*futures) return responses - def get_source_from_months(self, async=True): + def get_source_from_months(self, asynchronous=True): events = [] - if async: + if asynchronous: loop = asyncio.get_event_loop() events = loop.run_until_complete(self.get_months_async()) else: diff --git a/management/parsers/ups2019.py b/management/parsers/ups2019.py new file mode 100644 index 0000000..c6bd7e3 --- /dev/null +++ b/management/parsers/ups2019.py @@ -0,0 +1,126 @@ +# Copyright (C) 2019 Alban Gruin +# +# celcatsanitizer is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# celcatsanitizer is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with celcatsanitizer. If not, see <http://www.gnu.org/licenses/>. + +from datetime import date, datetime, timedelta +from html import unescape + +from django.utils import timezone + +import requests + +from ...models import Course, Group, Module, Room +from ...utils import get_current_week, get_week +from .abstractparser import AbstractParser +from .ups2018 import GROUP_PREFIXES + + +class Parser(AbstractParser): + def __get_event(self, event, year, week): + if event["allDay"]: + return + + begin = timezone.make_aware( + datetime.strptime(event["start"], "%Y-%m-%dT%H:%M:%S") + ) + end = timezone.make_aware( + datetime.strptime(event["end"], "%Y-%m-%dT%H:%M:%S") + ) + + if year is not None and week is not None: + event_year, event_week, _ = begin.isocalendar() + if event_year != year or event_week != week: + return + + data = [unescape(st.strip()) + for st in event["description"].split("<br />")] + groups = [] + rooms = [] + + course = Course.objects.create( + source=self.source, begin=begin, end=end, + celcat_id=event["id"] + ) + + max_i = len(data) + + if event.get("eventCategory") is not None and \ + len(event.get("eventCategory", "")) > 0: + course.type = event["eventCategory"] + max_i -= 1 + + if event.get("module", "") is not None and \ + len(event.get("module", "")) > 0: + module, _ = Module.objects.get_or_create(name=event["module"]) + course.module = module + + i = 0 + while i < max_i and not data[i].startswith(GROUP_PREFIXES): + rooms.append(data[i]) + i += 1 + course.rooms.add(*Room.objects.filter(name__in=rooms)) + + if len(rooms) != course.rooms.count(): + print(rooms, course.rooms) + + while i < max_i and data[i].startswith(GROUP_PREFIXES): + group, _ = Group.objects.get_or_create(source=self.source, + celcat_name=data[i]) + groups.append(group) + i += 1 + course.groups.add(*groups) + + if i < max_i and course.module is not None and \ + data[i].startswith(course.module.name): + course.name = data[i] + i += 1 + + course.notes = "\n".join(data[i:max_i]).strip() + if "other" in data[i]: + print("Warning: \"other\" in notes") + + return course + + def get_events(self, today, year=None, week=None): + for event in self.events: + course = self.__get_event(event, year, week) + if course is not None: + yield course + + def get_update_date(self): + return + + def get_weeks(self): + # FIXME: détection automatique à partir des événements présents + beginning, _ = get_week(*get_current_week()) + self.weeks = {"1": beginning} + + return self.weeks + + def get_source(self): + start = date.today() + end = start + timedelta(days=365) + + req = requests.post(self.source.url, + headers={"User-Agent": self.user_agent}, + data={"calView": "month", + "resType": 103, + "federationIds[]": self.source.metadata, + "start": start.strftime("%Y-%m-%d"), + "end": end.strftime("%Y-%m-%d")}) + req.encoding = "uft8" + req.raise_for_status() + + self.events = req.json() + return self.events |