1 package net.pterodactylus.arachne.core;
3 import java.io.IOException;
4 import java.io.InputStream;
6 import java.net.URLConnection;
7 import java.util.logging.Level;
8 import java.util.logging.Logger;
10 import net.pterodactylus.arachne.parser.HtmlEditorKitParser;
11 import net.pterodactylus.arachne.parser.ParserListener;
12 import de.ina.util.validation.Validation;
15 * Fetches URLs, parses the received content (if it is HTML) and adds all
16 * resulting links to the queue in the core.
18 * @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
20 class URLFetcher implements Runnable, ParserListener {
23 private static final Logger logger = Logger.getLogger(URLFetcher.class.getName());
26 private final Core core;
28 /** The URL to fetch. */
29 private final URL url;
32 * Creates a new fetcher for the given URL.
39 public URLFetcher(Core core, URL url) {
40 Validation.begin().isNotNull("core", core).isNotNull("url", url).check();
48 * @see java.lang.Runnable#run()
51 logger.log(Level.INFO, "Starting URL Fetcher for “" + url + "”.");
53 URLConnection urlConnection = url.openConnection();
54 long contentLength = urlConnection.getContentLength();
55 String contentType = urlConnection.getContentType();
56 logger.log(Level.INFO, "Type is “" + contentType + "”, length is " + contentLength + ".");
57 HtmlEditorKitParser htmlEditorKitParser = new HtmlEditorKitParser();
58 htmlEditorKitParser.parse(this, urlConnection.getInputStream(), "UTF-8");
59 } catch (IOException ioe1) {
60 logger.log(Level.WARNING, "Could not fetch “" + url + "”.", ioe1);
65 // INTERFACE ParserListener
71 public void parsedLink(InputStream inputStream, String linkTarget, String linkTitle, String linkText) {
72 System.out.println("Found link to “" + linkTarget + "” named “" + linkText + "” or “" + linkTitle + "”.");
78 public void parsedTitle(InputStream inputStream, String title) {
79 System.out.println("Found title “" + title + "”.");