# Copyright (C) 2017 Alban Gruin # # celcatsanitizer is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # celcatsanitizer is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with celcatsanitizer. If not, see <http://www.gnu.org/licenses/>. from bs4 import BeautifulSoup from django.utils import timezone from edt.models import Group, Room, Course from edt.utils import get_week import datetime import re import requests class Week: def __init__(self, number, start): self.number = number self.start = timezone.make_aware( datetime.datetime.strptime(start, "%d/%m/%Y")) def get_day(self, id): return self.start + datetime.timedelta(id) @property def year(self): return self.start.year def add_time(date, time): delta = datetime.timedelta(hours=time.hour, minutes=time.minute) return date + delta def consolidate_group(group): group_content_key = ("mention", "subgroup", "td", "tp") group_content_list = group.group_info[1:] if group.subgroup is not None: group_content = dict(zip(group_content_key, group_content_list)) for i in range(len(group_content_list))[::-1]: del group_content[group_content_key[i]] group_content[group_content_key[i] + "__isnull"] = True if group_content_list[i] is not None: break group.parent = Group.objects.filter(timetable=group.timetable, **group_content).first() group.save() if group.tp is None: group_content = dict(zip(group_content_key, group_content_list)) last_is_none = False for i, key in enumerate(group_content_key): if group_content_list[i] is None or last_is_none: del group_content[key] group_content[key + "__isnull"] = last_is_none last_is_none = True Group.objects.filter(timetable=group.timetable, parent__isnull=True, **group_content).update(parent=group) def consolidate_groups(groups): for group in groups: if group.parent == None: consolidate_group(group) def delete_courses_in_week(timetable, year, week): start, end = get_week(year, week) Course.objects.filter(begin__gte=start, begin__lt=end, timetable=timetable).delete() def get_from_db_or_create(cls, **kwargs): obj = cls.objects.all().filter(**kwargs) obj = obj.first() if obj is None: obj = cls(**kwargs) obj.save() return obj def get_events(timetable, year, week, soup, weeks_in_soup): for event in soup.find_all("event"): title = None type_ = None groups = None rooms = None notes = None if weeks_in_soup[event.rawweeks.text].number == week and \ weeks_in_soup[event.rawweeks.text].year == year and \ event.resources.group is not None and \ event.starttime is not None and event.endtime is not None: date = weeks_in_soup[event.rawweeks.text].get_day(int( event.day.text)) begin = add_time(date, datetime.datetime.strptime( event.starttime.text, "%H:%M")) end = add_time(date, datetime.datetime.strptime( event.endtime.text, "%H:%M")) groups = [get_from_db_or_create(Group, timetable=timetable, celcat_name=item.text) for item in event.resources.group.find_all("item")] consolidate_groups(groups) if event.notes is not None: notes = event.notes.text if event.resources.module is not None: title = event.resources.module.item.text elif notes is not None: title = notes notes = None else: title = "Aucune information" if event.category is not None: type_ = event.category.text if event.resources.room is not None: rooms = [get_from_db_or_create(Room, name=item.text) for item in event.resources.room.find_all("item")] yield title, type_, groups, rooms, notes, begin, end def get_update_date(soup): # Explication de la regex # # (\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+) # (\d+) au moins un nombre # / un slash # (\d+) au moins un nombre # / un slash # (\d+) au moins un nombre # \s+ au moins un espace # (\d+) au moins un nombre # : un deux-points # (\d+) au moins un nombre # : un deux-points # (\d+) au moins un nombre datetime_regex = re.compile("(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+)") search = datetime_regex.search(soup.footer.text) if search is None: return None day, month, year, hour, minute, second = [int(v) for v in search.groups()] date = datetime.datetime(year, month, day, hour, minute, second) return timezone.make_aware(date) def get_weeks(soup): weeks = {} for span in soup.find_all("span"): weeks[span.alleventweeks.text] = Week(int(span.title.text), span["date"]) return weeks def get_xml(url): r = requests.get(url) r.encoding = "utf8" soup = BeautifulSoup(r.content, "html.parser") return soup