diff options
| author | Alban Gruin | 2018-04-23 16:20:38 +0200 | 
|---|---|---|
| committer | Alban Gruin | 2018-04-23 16:20:38 +0200 | 
| commit | 63ceb7ff232cd4015ae148bec31c6d72d9239c19 (patch) | |
| tree | 9aee056480b5fae0c97f5e1c8be4ab1358602926 /management | |
| parent | af055dbf703af0eca03862194de1a0f34ac1fcc5 (diff) | |
| parent | 9853d375d9e482c23f5fdbed9a3cf041029a92cb (diff) | |
Merge branch 'qsjps' into futur
Diffstat (limited to 'management')
| -rw-r--r-- | management/commands/_private.py | 47 | ||||
| -rw-r--r-- | management/commands/cleancourses.py | 12 | ||||
| -rw-r--r-- | management/commands/listtimetables.py | 18 | ||||
| -rw-r--r-- | management/commands/reparse.py | 30 | ||||
| -rw-r--r-- | management/commands/timetables.py | 94 | 
5 files changed, 120 insertions, 81 deletions
| diff --git a/management/commands/_private.py b/management/commands/_private.py index 4dd9262..94c1918 100644 --- a/management/commands/_private.py +++ b/management/commands/_private.py @@ -1,4 +1,4 @@ -#    Copyright (C) 2017  Alban Gruin +#    Copyright (C) 2017-2018  Alban Gruin  #  #    celcatsanitizer is free software: you can redistribute it and/or modify  #    it under the terms of the GNU Affero General Public License as published @@ -19,33 +19,26 @@ import re  from bs4 import BeautifulSoup  from django.utils import timezone -from edt.models import Group, Room, Course -from edt.utils import get_week +from ...models import Course, Group, Room +from ...utils import get_week  import requests  import edt +  def add_time(date, time):      ptime = datetime.datetime.strptime(time, "%H:%M")      delta = datetime.timedelta(hours=ptime.hour, minutes=ptime.minute)      return date + delta -def delete_courses_in_week(timetable, year, week, today): + +def delete_courses_in_week(source, year, week, today):      start, end = get_week(year, week)      Course.objects.filter(begin__gte=max(start, today), begin__lt=end, -                          timetable=timetable).delete() - -def get_from_db_or_create(cls, **kwargs): -    obj = cls.objects.all().filter(**kwargs) - -    obj = obj.first() -    if obj is None: -        obj = cls(**kwargs) -        obj.save() +                          source=source).delete() -    return obj -def get_event(timetable, event, event_week, today): +def get_event(source, event, event_week, today):      """Renvoie une classe Course à partir d’un événement récupéré par BS4"""      # On récupère la date de l’évènement à partir de la semaine      # et de la semaine référencée, puis l’heure de début et de fin @@ -58,17 +51,17 @@ def get_event(timetable, event, event_week, today):          return      # Création de l’objet cours -    course = Course.objects.create(timetable=timetable, begin=begin, end=end) +    course = Course.objects.create(source=source, begin=begin, end=end)      # On récupère les groupes concernés par les cours -    groups = [get_from_db_or_create(Group, timetable=timetable, -                                    celcat_name=item.text) +    groups = [Group.objects.get_or_create(source=source, +                                          celcat_name=item.text)[0]                for item in event.resources.group.find_all("item")]      course.groups.add(*groups)      # On récupère le champ « remarque »      if event.notes is not None: -        course.notes = event.notes.text +        course.notes = "\n".join(event.notes.find_all(text=True))      # On récupère le champ « nom »      if event.resources.module is not None: @@ -90,32 +83,34 @@ def get_event(timetable, event, event_week, today):      # en ait pas… qui sont ils, leurs réseaux, tout ça…), on les insère      # dans la base de données, et on les ajoute dans l’objet cours      if event.resources.room is not None: -        rooms = [get_from_db_or_create(Room, name=item.text) +        rooms = [Room.objects.get_or_create(name=item.text)[0]                   for item in event.resources.room.find_all("item")]          course.rooms.add(*rooms)      return course -def get_events(timetable, soup, weeks_in_soup, today, year=None, week=None): + +def get_events(source, soup, weeks_in_soup, today, year=None, week=None):      """Récupère tous les cours disponibles dans l’emploi du temps Celcat.      Le traîtement se limitera à la semaine indiquée si il y en a une."""      for event in soup.find_all("event"):          event_week = weeks_in_soup[event.rawweeks.text] -        event_week_num = event_week.isocalendar()[1] # Numéro de semaine +        event_week_num = event_week.isocalendar()[1]  # Numéro de semaine          # On passe le traitement si la semaine de l’événement ne correspond pas          # à la semaine passée, ou qu’il ne contient pas de groupe ou n’a pas de          # date de début ou de fin. -        if (event_week_num == week and event_week.year == year or \ +        if (event_week_num == week and event_week.year == year or              year is None or week is None) and \             event.resources.group is not None and \             event.starttime is not None and event.endtime is not None: -            course = get_event(timetable, event, event_week, today) +            course = get_event(source, event, event_week, today)              # On renvoie le cours si il n’est pas nul              if course is not None:                  yield course +  def get_update_date(soup):      # Explication de la regex      # @@ -140,6 +135,7 @@ def get_update_date(soup):      date = datetime.datetime(year, month, day, hour, minute, second)      return timezone.make_aware(date) +  def get_weeks(soup):      # Les semaines sont référencées de manière assez… exotique      # En gros, il y a une liste d’éléments span qui contiennent une sorte d’ID @@ -151,13 +147,14 @@ def get_weeks(soup):      # Un cours contient donc un ID de semaine, puis le nombre de jours après le      # début de cette semaine.      weeks = {} -    for span in soup.find_all("span"): # Liste de toutes les semaines définies +    for span in soup.find_all("span"):  # Liste de toutes les semaines définies          # On parse la date et on la fait correspondre à l’ID          weeks[span.alleventweeks.text] = timezone.make_aware(              datetime.datetime.strptime(span["date"], "%d/%m/%Y"))      return weeks +  def get_xml(url):      user_agent = "celcatsanitizer/" + edt.VERSION      req = requests.get(url, headers={"User-Agent": user_agent}) diff --git a/management/commands/cleancourses.py b/management/commands/cleancourses.py index f6041ef..246cfcc 100644 --- a/management/commands/cleancourses.py +++ b/management/commands/cleancourses.py @@ -15,22 +15,24 @@  from django.core.management.base import BaseCommand  from django.db import transaction -from edt.models import Course, Group + +from ...models import Course, Group  class Command(BaseCommand):      help = "Remove all courses and groups from the database"      def add_arguments(self, parser): -        parser.add_argument("--timetable", type=int, nargs="+") +        parser.add_argument("--source", type=int, nargs="+")      def handle(self, *args, **options):          with transaction.atomic(): -            if options["timetable"] is None: +            if options["source"] is None:                  Course.objects.all().delete()                  Group.objects.all().delete()              else: -                Course.objects.filter(timetable__id__in=options["timetable"]).delete() -                Group.objects.filter(timetable__id__in=options["timetable"]).delete() +                Course.objects.filter(source__id__in=options["source"]) \ +                              .delete() +                Group.objects.filter(source__id__in=options["source"]).delete()          self.stdout.write(self.style.SUCCESS("Done.")) diff --git a/management/commands/listtimetables.py b/management/commands/listtimetables.py index 6df7ba5..bd27e92 100644 --- a/management/commands/listtimetables.py +++ b/management/commands/listtimetables.py @@ -1,4 +1,4 @@ -#    Copyright (C) 2017  Alban Gruin +#    Copyright (C) 2017-2018  Alban Gruin  #  #    celcatsanitizer is free software: you can redistribute it and/or modify  #    it under the terms of the GNU Affero General Public License as published @@ -14,24 +14,18 @@  #    along with celcatsanitizer.  If not, see <http://www.gnu.org/licenses/>.  from django.core.management.base import BaseCommand -from edt.models import Timetable +from ...models import Source  class Command(BaseCommand):      help = "List timetables in the database" -    def add_arguments(self, parser): -        parser.add_argument("--order-by-id", action="store_true") -      def handle(self, *args, **options): -        timetables = Timetable.objects.all() -        if options["order_by_id"]: -            timetables = timetables.order_by("id") -        else: -            timetables = timetables.order_by("year__name", "name") +        sources = Source.objects.all() -        for timetable in timetables: -            self.stdout.write("{0} (id: {1})".format(timetable, timetable.id)) +        for source in sources: +            self.stdout.write("{0}\t: {1} (id: {2})".format( +                source.formatted_timetables, source, source.id))          self.stdout.write("")          self.stdout.write(self.style.SUCCESS("Done.")) diff --git a/management/commands/reparse.py b/management/commands/reparse.py new file mode 100644 index 0000000..20eb1b4 --- /dev/null +++ b/management/commands/reparse.py @@ -0,0 +1,30 @@ +#    Copyright (C) 2018  Alban Gruin +# +#    celcatsanitizer is free software: you can redistribute it and/or modify +#    it under the terms of the GNU Affero General Public License as published +#    by the Free Software Foundation, either version 3 of the License, or +#    (at your option) any later version. +# +#    celcatsanitizer is distributed in the hope that it will be useful, +#    but WITHOUT ANY WARRANTY; without even the implied warranty of +#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the +#    GNU Affero General Public License for more details. +# +#    You should have received a copy of the GNU Affero General Public License +#    along with celcatsanitizer.  If not, see <http://www.gnu.org/licenses/>. + +from django.core.management.base import BaseCommand +from ...models import Group + + +class Command(BaseCommand): +    help = "Reparses all groups in database" + +    def handle(self, *args, **options): +        self.stdout.write("Processing {0} groups…".format( +            Group.objects.count())) + +        for group in Group.objects.all(): +            group.save() + +        self.stdout.write(self.style.SUCCESS("Done.")) diff --git a/management/commands/timetables.py b/management/commands/timetables.py index ff00c8f..f92ad4e 100644 --- a/management/commands/timetables.py +++ b/management/commands/timetables.py @@ -1,4 +1,4 @@ -#    Copyright (C) 2017  Alban Gruin +#    Copyright (C) 2017-2018  Alban Gruin  #  #    celcatsanitizer is free software: you can redistribute it and/or modify  #    it under the terms of the GNU Affero General Public License as published @@ -20,12 +20,16 @@ from django.core.management.base import BaseCommand  from django.db import transaction  from django.db.models import Min -from edt.models import Course, Timetable -from edt.utils import get_week, tz_now -from ._private import delete_courses_in_week, get_events, get_update_date, get_weeks, get_xml +from ...models import Course, Source +from ...utils import get_week, tz_now + +from ._private import delete_courses_in_week, get_events, get_update_date, \ +    get_weeks, get_xml +  @transaction.atomic -def process_timetable_week(timetable, soup, weeks_in_soup, force, year=None, week=None): +def process_timetable_week(source, soup, weeks_in_soup, force, +                           year=None, week=None):      if year is not None and week is not None:          begin, end = get_week(year, week) @@ -40,75 +44,83 @@ def process_timetable_week(timetable, soup, weeks_in_soup, force, year=None, wee      else:          today = tz_now() -    # On récupère la mise à jour la plus ancienne dans les cours de l’emploi du temps -    last_update_date = Course.objects.filter(timetable=timetable) +    # On récupère la mise à jour la plus ancienne dans les cours de +    # l’emploi du temps +    last_update_date = Course.objects.filter(source=source)      if today is not None: -        # Cette date concerne les éléments commençant à partir d’aujourd’hui si la valeur -        # n’est pas nulle. +        # Cette date concerne les éléments commençant à partir +        # d’aujourd’hui si la valeur n’est pas nulle.          last_update_date = last_update_date.filter(begin__gte=today)      if year is not None and week is not None: -        # Si jamais on traite une semaine spécifique, on limite les cours sélectionnés -        # à ceux qui commencent entre le début du traitement et la fin de la semaine +        # Si jamais on traite une semaine spécifique, on limite les +        # cours sélectionnés à ceux qui commencent entre le début du +        # traitement et la fin de la semaine          last_update_date = last_update_date.filter(begin__lt=end) -    last_update_date = last_update_date.aggregate(Min("last_update")) \ -                       ["last_update__min"] +    last_update_date = last_update_date.aggregate( +        Min("last_update"))["last_update__min"]      # Date de mise à jour de Celcat, utilisée à des fins de statistiques      new_update_date = get_update_date(soup) -    # On ne fait pas la mise à jour si jamais la dernière date de MàJ est plus récente -    # que celle indiquée par Celcat. -    # Attention, le champ last_update de la classe Course représente l’heure à laquelle -    # le cours a été inséré dans la base de données, et non pas la date indiquée par -    # Celcat. -    if not force and last_update_date is not None and new_update_date is not None and \ -       last_update_date >= new_update_date: +    # On ne fait pas la mise à jour si jamais la dernière date de MàJ +    # est plus récente que celle indiquée par Celcat.  Attention, le +    # champ last_update de la classe Course représente l’heure à +    # laquelle le cours a été inséré dans la base de données, et non +    # pas la date indiquée par Celcat. +    if not force and last_update_date is not None and \ +       new_update_date is not None and last_update_date >= new_update_date:          return      if year is not None and week is not None:          # On efface la semaine à partir de maintenant si jamais          # on demande le traitement d’une seule semaine -        delete_courses_in_week(timetable, year, week, today) +        delete_courses_in_week(source, year, week, today)      else:          # Sinon, on efface tous les cours à partir de maintenant.          # Précisément, on prend la plus grande valeur entre la première semaine          # présente dans Celcat et maintenant.          delete_from = min(weeks_in_soup.values())          if not force: -            # Si jamais on force la MàJ, on efface tout à partir de la première semaine +            # Si jamais on force la MàJ, on efface tout à partir de la +            # première semaine              delete_from = max(delete_from, today) -        Course.objects.filter(timetable=timetable, begin__gte=delete_from).delete() +        Course.objects.filter(source=source, begin__gte=delete_from).delete()      # Tous les cours commençant sur la période traitée      # sont parsés, puis enregistrés dans la base de données. -    for course in get_events(timetable, soup, weeks_in_soup, today, year, week): +    for course in get_events(source, soup, weeks_in_soup, today, year, week):          course.save()      # On renseigne la date de mise à jour de Celcat, à des fins de statistiques -    timetable.last_update_date = new_update_date -    timetable.save() +    source.last_update_date = new_update_date +    source.save() + -def process_timetable(timetable, force, year=None, weeks=None): -    soup = get_xml(timetable.url) +def process_timetable(source, force, year=None, weeks=None): +    soup = get_xml(source.url)      weeks_in_soup = get_weeks(soup)      if year is not None and weeks is not None:          for week in weeks: -            process_timetable_week(timetable, soup, weeks_in_soup, force, year, week) +            process_timetable_week(source, soup, weeks_in_soup, force, +                                   year, week)      else: -        process_timetable_week(timetable, soup, weeks_in_soup, force) +        process_timetable_week(source, soup, weeks_in_soup, force)  class Command(BaseCommand):      help = "Fetches registered celcat timetables"      def add_arguments(self, parser): -        parser.add_argument("--all", const=True, default=False, action="store_const") -        parser.add_argument("--force", const=True, default=False, action="store_const") -        parser.add_argument("--week", type=int, choices=range(1, 54), nargs="+") +        parser.add_argument("--all", const=True, default=False, +                            action="store_const") +        parser.add_argument("--force", const=True, default=False, +                            action="store_const") +        parser.add_argument("--week", type=int, choices=range(1, 54), +                            nargs="+")          parser.add_argument("--year", type=int, nargs=1)      def handle(self, *args, **options): @@ -120,7 +132,8 @@ class Command(BaseCommand):          elif options["week"] is None:              _, week, day = tz_now().isocalendar()              if day >= 6: -                year, week, _ = (tz_now() + datetime.timedelta(weeks=1)).isocalendar() +                year, week, _ = (tz_now() + datetime.timedelta(weeks=1)) \ +                                                            .isocalendar()              weeks = [week]          else:              weeks = options["week"] @@ -131,16 +144,18 @@ class Command(BaseCommand):              elif year is None:                  year = options["year"][0] -        for timetable in Timetable.objects.all(): -            self.stdout.write("Processing {0}".format(timetable)) +        for source in Source.objects.all(): +            self.stdout.write("Processing {0}".format( +                source.formatted_timetables))              try: -                process_timetable(timetable, options["force"], year, weeks) +                process_timetable(source, options["force"], year, weeks)              except KeyboardInterrupt:                  break              except Exception:                  self.stderr.write( -                    self.style.ERROR("Failed to process {0}:".format(timetable)) +                    self.style.ERROR("Failed to process {0}:".format( +                        source.formatted_timetables))                  )                  self.stderr.write(self.style.ERROR(traceback.format_exc()))                  errcount += 1 @@ -148,4 +163,5 @@ class Command(BaseCommand):          if errcount == 0:              self.stdout.write(self.style.SUCCESS("Done."))          else: -            self.stdout.write(self.style.ERROR("Done with {0} errors.".format(errcount))) +            self.stdout.write(self.style.ERROR("Done with {0} errors.".format( +                errcount))) | 
