from bs4 import BeautifulSoup from django.utils import timezone from edt.models import Group, Room, Course import datetime import requests class Week: def __init__(self, number, start): self.number = number self.start = timezone.make_aware(datetime.datetime.strptime(start, "%d/%m/%Y")) def get_day(self, id): return self.start + datetime.timedelta(id) def add_time(date, time): delta = datetime.timedelta(hours=time.hour, minutes=time.minute) return date + delta def delete_courses_in_week(year, week): start = timezone.make_aware(datetime.datetime.strptime("{0}-W{1:02d}-1".format(year, week), "%Y-W%W-%w")) end = start + datetime.timedelta(weeks=1) Course.objects.filter(begin__gte=start, begin__lt=end).delete() def get_from_db_or_create(cls, name, timetable=None): obj = cls.objects.all().filter(name=name) if timetable is not None: obj = obj.filter(timetable=timetable) obj = obj.first() if obj is None: obj = cls(name=name) obj.timetable = timetable obj.save() return obj def get_events(soup, weeks, week, timetable): for event in soup.find_all("event"): title = None type_ = None groups = None rooms = None notes = None if weeks[event.rawweeks.text].number != week: continue if event.resources.module is not None: title = event.resources.module.item.text if event.category is not None: type_ = event.category.text if event.resources.group is not None: groups = [get_from_db_or_create(Group, item.text, timetable) for item in event.resources.group.find_all("item")] if event.resources.room is not None: rooms = [get_from_db_or_create(Room, item.text) for item in event.resources.room.find_all("item")] if event.notes is not None: notes = event.notes.text date = weeks[event.rawweeks.text].get_day(int(event.day.text)) begin = add_time(date, datetime.datetime.strptime(event.starttime.text, "%H:%M")) end = add_time(date, datetime.datetime.strptime(event.endtime.text, "%H:%M")) yield title, type_, groups, rooms, notes, begin, end def get_weeks(soup): weeks = {} for span in soup.find_all("span"): weeks[span.alleventweeks.text] = Week(int(span.title.text), span["date"]) return weeks def get_xml(url): r = requests.get(url) r.encoding = "utf8" soup = BeautifulSoup(r.text, "html.parser") return soup