1 package net.pterodactylus.arachne.core;
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.MalformedURLException;
7 import java.net.URLConnection;
8 import java.util.logging.Level;
9 import java.util.logging.Logger;
11 import net.pterodactylus.arachne.parser.HtmlEditorKitParser;
12 import net.pterodactylus.arachne.parser.ParserListener;
13 import de.ina.util.validation.Validation;
16 * Fetches URLs, parses the received content (if it is HTML) and adds all
17 * resulting links to the queue in the core.
19 * @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
21 class URLFetcher implements Runnable, ParserListener {
24 private static final Logger logger = Logger.getLogger(URLFetcher.class.getName());
27 private final Core core;
29 /** The URL to fetch. */
30 private final URL url;
33 * Creates a new fetcher for the given URL.
40 public URLFetcher(Core core, URL url) {
41 Validation.begin().isNotNull("core", core).isNotNull("url", url).check();
49 * @see java.lang.Runnable#run()
52 logger.log(Level.INFO, "Starting URL Fetcher for “" + url + "”.");
54 URLConnection urlConnection = url.openConnection();
55 long contentLength = urlConnection.getContentLength();
56 String contentType = urlConnection.getContentType();
57 logger.log(Level.INFO, "Type is “" + contentType + "”, length is " + contentLength + ".");
58 HtmlEditorKitParser htmlEditorKitParser = new HtmlEditorKitParser();
59 htmlEditorKitParser.parse(this, urlConnection.getInputStream(), "UTF-8");
60 } catch (IOException ioe1) {
61 logger.log(Level.WARNING, "Could not fetch “" + url + "”.", ioe1);
66 // INTERFACE ParserListener
72 public void parsedLink(InputStream inputStream, String linkTarget, String linkTitle, String linkText) {
75 newLink = new URL(url, linkTarget);
76 core.addPage(newLink);
77 } catch (MalformedURLException mue1) {
78 logger.log(Level.WARNING, "Could not create URL from “" + url + "” and “" + linkTarget + "”.", mue1);
79 } catch (IllegalArgumentException iae1) {
80 logger.log(Level.WARNING, "Could not add “" + newLink + "” to core queue.", iae1);
87 public void parsedTitle(InputStream inputStream, String title) {