Use a default parser factory in the core and hand it in to every URL fetcher.
[arachne.git] / src / net / pterodactylus / arachne / core / URLFetcher.java
index d2ce831..cdccd2b 100644 (file)
@@ -12,7 +12,8 @@ import java.util.List;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import net.pterodactylus.arachne.parser.HtmlEditorKitParser;
+import net.pterodactylus.arachne.parser.Parser;
+import net.pterodactylus.arachne.parser.ParserFactory;
 import net.pterodactylus.arachne.parser.ParserListener;
 import de.ina.util.io.MessageDigestInputStream;
 import de.ina.util.validation.Validation;
@@ -28,6 +29,9 @@ class URLFetcher implements Runnable, ParserListener {
        /** The logger. */
        private static final Logger logger = Logger.getLogger(URLFetcher.class.getName());
 
+       /** The parser factory. */
+       private final ParserFactory parserFactory;
+
        /** The URL to fetch. */
        private final URL url;
 
@@ -46,14 +50,18 @@ class URLFetcher implements Runnable, ParserListener {
        /**
         * Creates a new fetcher for the given URL.
         *
+        * @param parserFactory
+        *            The parser factory that is used to create content-type
+        *            specific parsers
         * @param url
         *            The URL to fetch
         * @throws NoSuchAlgorithmException
         *             if no {@link MessageDigest} instance with an
         *             <code>SHA-256</code> algorithm can be created
         */
-       public URLFetcher(URL url) throws NoSuchAlgorithmException {
-               Validation.begin().isNotNull("url", url).check();
+       public URLFetcher(ParserFactory parserFactory, URL url) throws NoSuchAlgorithmException {
+               Validation.begin().isNotNull("parserFactory", parserFactory).isNotNull("url", url).check();
+               this.parserFactory = parserFactory;
                this.url = url;
                messageDigest = MessageDigest.getInstance("SHA-256");
        }
@@ -112,8 +120,8 @@ class URLFetcher implements Runnable, ParserListener {
                        logger.log(Level.INFO, "Type is “" + contentType + "”, length is " + contentLength + ".");
                        urlInputStream = urlConnection.getInputStream();
                        hashInputStream = new MessageDigestInputStream(urlInputStream, messageDigest);
-                       HtmlEditorKitParser htmlEditorKitParser = new HtmlEditorKitParser();
-                       htmlEditorKitParser.parse(this, hashInputStream, "UTF-8");
+                       Parser parser = parserFactory.getParser(contentType);
+                       parser.parse(this, hashInputStream, "UTF-8");
                        hash = messageDigest.digest();
                } catch (IOException ioe1) {
                        logger.log(Level.WARNING, "Could not fetch “" + url + "”.", ioe1);