df8180f038e064a986143bd44b7acbedde07ea92
[arachne.git] / src / net / pterodactylus / arachne / core / URLFetcher.java
1 package net.pterodactylus.arachne.core;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.net.URLConnection;
7 import java.util.logging.Level;
8 import java.util.logging.Logger;
9
10 import net.pterodactylus.arachne.parser.HtmlEditorKitParser;
11 import net.pterodactylus.arachne.parser.ParserListener;
12 import de.ina.util.validation.Validation;
13
14 /**
15  * Fetches URLs, parses the received content (if it is HTML) and adds all
16  * resulting links to the queue in the core.
17  *
18  * @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
19  */
20 class URLFetcher implements Runnable, ParserListener {
21
22         /** The logger. */
23         private static final Logger logger = Logger.getLogger(URLFetcher.class.getName());
24
25         /** The core. */
26         private final Core core;
27
28         /** The URL to fetch. */
29         private final URL url;
30
31         /**
32          * Creates a new fetcher for the given URL.
33          *
34          * @param url
35          *            The URL to fetch
36          * @param core
37          *            TODO
38          */
39         public URLFetcher(Core core, URL url) {
40                 Validation.begin().isNotNull("core", core).isNotNull("url", url).check();
41                 this.core = core;
42                 this.url = url;
43         }
44
45         /**
46          * {@inheritdoc}
47          *
48          * @see java.lang.Runnable#run()
49          */
50         public void run() {
51                 logger.log(Level.INFO, "Starting URL Fetcher for “" + url + "”.");
52                 try {
53                         URLConnection urlConnection = url.openConnection();
54                         long contentLength = urlConnection.getContentLength();
55                         String contentType = urlConnection.getContentType();
56                         logger.log(Level.INFO, "Type is “" + contentType + "”, length is " + contentLength + ".");
57                         HtmlEditorKitParser htmlEditorKitParser = new HtmlEditorKitParser();
58                         htmlEditorKitParser.parse(this, urlConnection.getInputStream(), "UTF-8");
59                 } catch (IOException ioe1) {
60                         logger.log(Level.WARNING, "Could not fetch “" + url + "”.", ioe1);
61                 }
62         }
63
64         //
65         // INTERFACE ParserListener
66         //
67
68         /**
69          * {@inheritDoc}
70          */
71         public void parsedLink(InputStream inputStream, String linkTarget, String linkTitle, String linkText) {
72                 System.out.println("Found link to “" + linkTarget + "” named “" + linkText + "” or “" + linkTitle + "”.");
73         }
74
75         /**
76          * {@inheritDoc}
77          */
78         public void parsedTitle(InputStream inputStream, String title) {
79                 System.out.println("Found title “" + title + "”.");
80         }
81
82 }