Skip to content
This repository has been archived by the owner on Mar 15, 2019. It is now read-only.

CinemaCity: Olympia #95

Closed
wants to merge 10 commits into from
147 changes: 147 additions & 0 deletions zitkino/scrapers/cinemacity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
# -*- coding: utf-8 -*-

"""
Base class with shared implementation for the cinemas
agregated on www.cinemacity.cz web page.

Do not use this class directly. It is ment to be subclassed.
The subclass MUST specify its location number as in cinemacity.cz API
and set an instance of Cinema.

The class scrapes program for the entire week starting with
`datetime.date.today()`.

Example usage:
.. code-block: python

from zitkino.models import Cinema

from .cinemacity import MultiplexScraper

cinema = Cinema(...)

class OlympiaScraper(MultiplexScraper):

location = '1010103'
cinema = cinema
"""

import datetime

from zitkino import parsers
from zitkino.utils import download
from zitkino.models import Showtime, ScrapedFilm

class MultiplexScraper(object):

url = ('http://www.cinemacity.cz/scheduleInfo?locationId={location}'
'&date={date_string}')

tags_map = {
u'ČT': 'subtitles',
u'DAB': 'dubbing',
u'CZ': 'czech',
u'EN': 'english'
}

def __init__(self):
if type(self) == MultiplexScraper:
raise Exception('Base class can\'t be instantiated.')

def __call__(self):
showtimes = []

start = datetime.date.today()
# formatted date strings
days = [start + datetime.timedelta(days=i) for i in range(7)]

for day in days:
data = self._scrape_table(day)
for showtime in self._parse_table(data, day):
yield showtime

def _scrape_table(self, date=None):
date_string = date.strftime('%d/%m/%Y') if date else 'null'
url = self._build_url(date_string)
resp = download(url)
html = parsers.html(resp.content.decode('utf-8'), base_url=resp.url)
result = html.cssselect('.scheduleInfoTable tr')
return result

def _parse_table(self, rows, date=None):
if not date:
date = datetime.date.today()

showtimes = []

for row in rows[1:]: # skip header
title_main = row[0][0].text_content()
movie_type = self._get_tags(row[2].text_content())
dubbed = self._get_tags(row[3].text_content())
length = row[4].text_content()

times = self._parse_row_remnant(row[5:])
tags = []
if movie_type:
tags.append(movie_type)
if dubbed:
tags.append(dubbed)

for screening in times:
starts_at = parsers.date_time_year(
date.strftime('%d. %m.'),
screening,
date.strftime('%Y')
)
showtime = Showtime(
cinema=self._get_cinema(),
film_scraped=ScrapedFilm(
title_main=title_main,
titles=[title_main],
length=length
),
starts_at=starts_at,
tags=tags,
)
showtimes.append(showtime)

return showtimes

def _parse_row_remnant(self, row):
"""Extract screening times from the rest of the row."""
# Some of the cinemas may have some other data in the
# cell with the time itself.
# This method filters them out, leaving the times only.
forbidden_keywords = ['IMAX']
partial_result = []
for cell in row:
if cell.text:
data = cell.xpath('.//text()')
# remove all whitespaces
partial_result.extend(filter(None,
map(lambda x: x.strip(), data)))

# filter all strings that does not match the forbidden_keywords
# list items
return filter(lambda x: x not in forbidden_keywords, partial_result)

def _get_tags(self, tag):
return self.tags_map.get(tag)

def _build_url(self, date):
"""
Return an URL.
"""
try:
return self.__class__.url.format(
location=self.__class__.location,
date_string=date
)
except AttributeError:
raise NotImplementedError('This method works only in proper subclass.')

def _get_cinema(self):
try:
return self.cinema
except AttributeError:
raise NotImplementedError('This method works only in proper subclass.')
22 changes: 22 additions & 0 deletions zitkino/scrapers/multiplex_olympia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# -*- coding: utf-8 -*-

from zitkino.models import Cinema

from . import scrapers
from .cinemacity import MultiplexScraper


cinema = Cinema(
name=u'Multikino Cinema City Olympia',
url='http://www.cinemacity.cz/olympia',
street=u'U Dálnice 777',
town=u'Modřice',
coords=(49.1381280, 16.6330180)
)


@scrapers.register(cinema)
class OlympiaScraper(MultiplexScraper):

location = "1010103"
cinema = cinema
2 changes: 1 addition & 1 deletion zitkino/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ <h2>{{ starts_at_day|date }}</h2>
</a>
</th>
<td>
{% for cinema in showtimes_per_film.list|map(attribute='cinema')|unique(attribute='name')|sort(attribute='starts_at') %}
{% for cinema in showtimes_per_film.list|sort(attribute='starts_at')|map(attribute='cinema')|unique(attribute='name') %}
<a href="{{ cinema.url }}">{{ cinema.name }}</a>
{% endfor %}
</td>
Expand Down