Add new links to core.
[arachne.git] / src / net / pterodactylus / arachne / core / URLFetcher.java
1 package net.pterodactylus.arachne.core;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.MalformedURLException;
6 import java.net.URL;
7 import java.net.URLConnection;
8 import java.util.logging.Level;
9 import java.util.logging.Logger;
10
11 import net.pterodactylus.arachne.parser.HtmlEditorKitParser;
12 import net.pterodactylus.arachne.parser.ParserListener;
13 import de.ina.util.validation.Validation;
14
15 /**
16  * Fetches URLs, parses the received content (if it is HTML) and adds all
17  * resulting links to the queue in the core.
18  *
19  * @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
20  */
21 class URLFetcher implements Runnable, ParserListener {
22
23         /** The logger. */
24         private static final Logger logger = Logger.getLogger(URLFetcher.class.getName());
25
26         /** The core. */
27         private final Core core;
28
29         /** The URL to fetch. */
30         private final URL url;
31
32         /**
33          * Creates a new fetcher for the given URL.
34          *
35          * @param url
36          *            The URL to fetch
37          * @param core
38          *            TODO
39          */
40         public URLFetcher(Core core, URL url) {
41                 Validation.begin().isNotNull("core", core).isNotNull("url", url).check();
42                 this.core = core;
43                 this.url = url;
44         }
45
46         /**
47          * {@inheritdoc}
48          *
49          * @see java.lang.Runnable#run()
50          */
51         public void run() {
52                 logger.log(Level.INFO, "Starting URL Fetcher for “" + url + "”.");
53                 try {
54                         URLConnection urlConnection = url.openConnection();
55                         long contentLength = urlConnection.getContentLength();
56                         String contentType = urlConnection.getContentType();
57                         logger.log(Level.INFO, "Type is “" + contentType + "”, length is " + contentLength + ".");
58                         HtmlEditorKitParser htmlEditorKitParser = new HtmlEditorKitParser();
59                         htmlEditorKitParser.parse(this, urlConnection.getInputStream(), "UTF-8");
60                 } catch (IOException ioe1) {
61                         logger.log(Level.WARNING, "Could not fetch “" + url + "”.", ioe1);
62                 }
63         }
64
65         //
66         // INTERFACE ParserListener
67         //
68
69         /**
70          * {@inheritDoc}
71          */
72         public void parsedLink(InputStream inputStream, String linkTarget, String linkTitle, String linkText) {
73                 URL newLink = null;
74                 try {
75                         newLink = new URL(url, linkTarget);
76                         core.addPage(newLink);
77                 } catch (MalformedURLException mue1) {
78                         logger.log(Level.WARNING, "Could not create URL from “" + url + "” and “" + linkTarget + "”.", mue1);
79                 } catch (IllegalArgumentException iae1) {
80                         logger.log(Level.WARNING, "Could not add “" + newLink + "” to core queue.", iae1);
81                 }
82         }
83
84         /**
85          * {@inheritDoc}
86          */
87         public void parsedTitle(InputStream inputStream, String title) {
88         }
89
90 }