zitkino · apophys · Sep 22, 2013 · Oct 5, 2013 · Oct 5, 2013 · Oct 5, 2013
diff --git a/zitkino/scrapers/cinemacity.py b/zitkino/scrapers/cinemacity.py
@@ -0,0 +1,147 @@
+# -*- coding: utf-8 -*-
+
+"""
+Base class with shared implementation for the cinemas
+agregated on www.cinemacity.cz web page.
+
+Do not use this class directly. It is ment to be subclassed.
+The subclass MUST specify its location number as in cinemacity.cz API
+and set an instance of Cinema.
+
+The class scrapes program for the entire week starting with
+`datetime.date.today()`.
+
+Example usage:
+    .. code-block: python
+
+    from zitkino.models import Cinema
+
+    from .cinemacity import MultiplexScraper
+
+    cinema = Cinema(...)
+
+    class OlympiaScraper(MultiplexScraper):
+
+        location = '1010103'
+        cinema = cinema
+"""
+
+import datetime
+
+from zitkino import parsers
+from zitkino.utils import download
+from zitkino.models import Showtime, ScrapedFilm
+
+class MultiplexScraper(object):
+
+    url = ('http://www.cinemacity.cz/scheduleInfo?locationId={location}'
+           '&date={date_string}')
+
+    tags_map = {
+        u'ČT': 'subtitles',
+        u'DAB': 'dubbing',
+        u'CZ': 'czech',
+        u'EN': 'english'
+    }
+
+    def __init__(self):
+        if type(self) == MultiplexScraper:
+            raise Exception('Base class can\'t be instantiated.')
+
+    def __call__(self):
+        showtimes = []
+
+        start = datetime.date.today()
+        # formatted date strings
+        days = [start + datetime.timedelta(days=i) for i in range(7)]
+
+        for day in days:
+            data = self._scrape_table(day)
+            for showtime in self._parse_table(data, day):
+                yield showtime
+
+    def _scrape_table(self, date=None):
+        date_string = date.strftime('%d/%m/%Y') if date else 'null'
+        url = self._build_url(date_string)
+        resp = download(url)
+        html = parsers.html(resp.content.decode('utf-8'), base_url=resp.url)
+        result = html.cssselect('.scheduleInfoTable tr')
+        return result
+
+    def _parse_table(self, rows, date=None):
+        if not date:
+            date = datetime.date.today()
+
+        showtimes = []
+
+        for row in rows[1:]: # skip header
+            title_main = row[0][0].text_content()
+            movie_type = self._get_tags(row[2].text_content())
+            dubbed = self._get_tags(row[3].text_content())
+            length = row[4].text_content()
+
+            times = self._parse_row_remnant(row[5:])
+            tags = []
+            if movie_type:
+                tags.append(movie_type)
+            if dubbed:
+                tags.append(dubbed)
+
+            for screening in times:
+                starts_at = parsers.date_time_year(
+                    date.strftime('%d. %m.'),
+                    screening,
+                    date.strftime('%Y')
+                    )
+                showtime = Showtime(
+                    cinema=self._get_cinema(),
+                    film_scraped=ScrapedFilm(
+                        title_main=title_main,
+                        titles=[title_main],
+                        length=length
+                    ),
+                    starts_at=starts_at,
+                    tags=tags,
+                )
+                showtimes.append(showtime)
+
+        return showtimes
+
+    def _parse_row_remnant(self, row):
+        """Extract screening times from the rest of the row."""
+        # Some of the cinemas may have some other data in the
+        # cell with the time itself.
+        # This method filters them out, leaving the times only.
+        forbidden_keywords = ['IMAX']
+        partial_result = []
+        for cell in row:
+            if cell.text:
+                data = cell.xpath('.//text()')
+                # remove all whitespaces
+                partial_result.extend(filter(None,
+                    map(lambda x: x.strip(), data)))
+
+        # filter all strings that does not match the forbidden_keywords
+        # list items
+        return filter(lambda x: x not in forbidden_keywords, partial_result)
+
+    def _get_tags(self, tag):
+        return self.tags_map.get(tag)
+
+    def _build_url(self, date):
+        """
+        Return an URL.
+        """
+        try:
+            return self.__class__.url.format(
+                location=self.__class__.location,
+                date_string=date
+                )
+        except AttributeError:
+            raise NotImplementedError('This method works only in proper subclass.')
+
+    def _get_cinema(self):
+        try:
+            return self.cinema
+        except AttributeError:
+            raise NotImplementedError('This method works only in proper subclass.')
diff --git a/zitkino/scrapers/multiplex_olympia.py b/zitkino/scrapers/multiplex_olympia.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+
+from zitkino.models import Cinema
+
+from . import scrapers
+from .cinemacity import MultiplexScraper
+
+
+cinema = Cinema(
+    name=u'Multikino Cinema City Olympia',
+    url='http://www.cinemacity.cz/olympia',
+    street=u'U Dálnice 777',
+    town=u'Modřice',
+    coords=(49.1381280, 16.6330180)
+)
+
+
+@scrapers.register(cinema)
+class OlympiaScraper(MultiplexScraper):
+
+    location = "1010103"
+    cinema = cinema
diff --git a/zitkino/templates/index.html b/zitkino/templates/index.html
@@ -15,7 +15,7 @@ <h2>{{ starts_at_day|date }}</h2>
                     </a>
                 </th>
                 <td>
-                    {% for cinema in showtimes_per_film.list|map(attribute='cinema')|unique(attribute='name')|sort(attribute='starts_at') %}
+                    {% for cinema in showtimes_per_film.list|sort(attribute='starts_at')|map(attribute='cinema')|unique(attribute='name') %}
                         <a href="{{ cinema.url }}">{{ cinema.name }}</a>
                     {% endfor %}
                 </td>