1 package net.pterodactylus.rhynodge.filters.webpages.savoy;
3 import static java.time.format.DateTimeFormatter.ofPattern;
4 import static java.util.Optional.empty;
5 import static java.util.Optional.of;
7 import java.time.LocalDate;
8 import java.time.LocalDateTime;
9 import java.time.LocalTime;
10 import java.time.format.DateTimeFormatter;
11 import java.util.HashSet;
12 import java.util.Optional;
14 import java.util.regex.Matcher;
15 import java.util.regex.Pattern;
17 import org.jsoup.nodes.Document;
18 import org.jsoup.nodes.Element;
21 * Extracts {@link Movie} information from an HTML document.
23 * @author <a href="mailto:bombe@pterodactylus.net">David ‘Bombe’ Roden</a>
25 public class MovieExtractor {
27 private static final Pattern datePattern = Pattern.compile(".*([0-9]{2}\\.[0-9]{2}\\.[0-9]{2}).*");
28 private static final Pattern timePattern = Pattern.compile(".*([0-9]{2}:[0-9]{2}).*");
29 private static final DateTimeFormatter dateFormatter = ofPattern("dd.MM.uu");
30 private static final DateTimeFormatter timeFormatter = ofPattern("HH:mm");
32 public Set<Movie> getMovies(Document document) {
33 Set<Movie> movies = new HashSet<>();
34 for (Element movieElement : document.select(".tx-spmovies-pi1-listrow")) {
35 String name = movieElement.select(".tx-spmovies-pi1-header h1").text();
36 Movie movie = new Movie(name);
37 for (TicketLink ticketLink : extractTicketLinks(movieElement)) {
38 movie.addTicketLink(ticketLink);
45 private Iterable<? extends TicketLink> extractTicketLinks(Element movieElement) {
46 Set<TicketLink> ticketLinks = new HashSet<>();
47 int dateCellIndex = 1;
48 for (Element dateCell : movieElement.select(".tx-spmovies-pi1-date-column")) {
49 Optional<String> dateString = extractDateString(dateCell);
50 if (!dateString.isPresent()) {
53 for (Element timeCell : getTimeCells(movieElement, dateCellIndex++)) {
54 Optional<String> timeString = extractTimeString(timeCell.select("a").text());
55 if (!timeString.isPresent()) {
58 LocalDateTime localDateTime = getPresentationTime(dateString, timeString);
59 String link = timeCell.select("a").attr("href");
60 TicketLink ticketLink = new TicketLink(localDateTime, link);
61 ticketLinks.add(ticketLink);
67 private LocalDateTime getPresentationTime(Optional<String> dateString, Optional<String> timeString) {
68 LocalDate date = LocalDate.parse(dateString.get(), dateFormatter);
69 LocalTime localTime = LocalTime.parse(timeString.get(), timeFormatter);
70 return date.atTime(localTime);
73 private Optional<String> extractTimeString(String cellContent) {
74 Matcher timeMatcher = timePattern.matcher(cellContent);
75 if (!timeMatcher.matches() || timeMatcher.groupCount() < 1) {
78 return of(timeMatcher.group(1));
81 private Iterable<? extends Element> getTimeCells(Element movieElement, int dateCellIndex) {
82 return movieElement.select(".tx-spmovies-pi1-shows:eq(" + dateCellIndex + ") div.time");
85 private Optional<String> extractDateString(Element dateCell) {
86 Matcher matcher = datePattern.matcher(dateCell.text());
87 if (!matcher.matches() || matcher.groupCount() < 1) {
90 return of(matcher.group(1));