from bs4 import BeautifulSoup from django.utils import timezone from edt.models import Group, Room import datetime import requests class Week: def __init__(self, number, start): self.number = number self.start = timezone.make_aware(datetime.datetime.strptime(start, "%d/%m/%Y")) def get_day(self, id): return self.start + datetime.timedelta(id) def add_time(date, time): delta = datetime.timedelta(hours=time.hour, minutes=time.minute) return date + delta def get_from_db_or_create(cls, name): obj = cls.objects.all().filter(name=name).first() if obj == None: obj = cls(name=name) obj.save() return obj def get_events(soup, weeks, week): for event in soup.find_all("event"): title = None type_ = None groups = None rooms = None if weeks[event.rawweeks.text].number != week: continue if event.resources.module is not None: title = event.resources.module.text if event.category is not None and title is not None: type_ = event.category.text if event.resources.group is not None and type_ is not None: groups = [get_from_db_or_create(Group, item.text) for item in event.resources.group.find_all("item")] if event.resources.room is not None and groups is not None: rooms = [get_from_db_or_create(Room, item.text) for item in event.resources.room.find_all("item")] date = weeks[event.rawweeks.text].get_day(int(event.day.text)) begin = add_time(date, datetime.datetime.strptime(event.starttime.text, "%H:%M")) end = add_time(date, datetime.datetime.strptime(event.endtime.text, "%H:%M")) yield (title, type_, groups, rooms, begin, end,) def get_weeks(soup): weeks = {} for span in soup.find_all("span"): weeks[span.alleventweeks.text] = Week(int(span.title.text), span["date"]) return weeks def get_xml(url): r = requests.get(url) r.encoding = "utf8" soup = BeautifulSoup(r.text, "html.parser") return soup