Use a default parser factory in the core and hand it in to every URL fetcher.
authorDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Tue, 10 Mar 2009 22:11:13 +0000 (23:11 +0100)
committerDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Tue, 10 Mar 2009 22:12:03 +0000 (23:12 +0100)
src/net/pterodactylus/arachne/core/Core.java
src/net/pterodactylus/arachne/core/URLFetcher.java

index 9e3f1a5..9efc960 100644 (file)
@@ -13,6 +13,7 @@ import java.util.concurrent.Executors;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
+import net.pterodactylus.arachne.parser.ParserFactory;
 import de.ina.util.service.AbstractService;
 import de.ina.util.thread.DumpingThreadFactory;
 import de.ina.util.validation.Validation;
@@ -41,6 +42,9 @@ public class Core extends AbstractService {
        // INTERNAL MEMBERS
        //
 
+       /** The parser factory. */
+       private ParserFactory parserFactory = new ParserFactory();
+
        /** Thread pool for the URL fetches. */
        private Executor urlFetcherExecutor = Executors.newFixedThreadPool(1, new DumpingThreadFactory("URLFetcher-"));
 
@@ -137,7 +141,7 @@ public class Core extends AbstractService {
                        URLFetcher urlFetcher;
                        try {
                                logger.log(Level.INFO, "Fetching “" + nextURL + "”...");
-                               urlFetcher = new URLFetcher(nextURL);
+                               urlFetcher = new URLFetcher(parserFactory, nextURL);
                                urlFetcherExecutor.execute(urlFetcher);
                        } catch (NoSuchAlgorithmException nsae1) {
                                logger.log(Level.SEVERE, "Could not get “SHA-256” message digest!", nsae1);
index d2ce831..cdccd2b 100644 (file)
@@ -12,7 +12,8 @@ import java.util.List;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
-import net.pterodactylus.arachne.parser.HtmlEditorKitParser;
+import net.pterodactylus.arachne.parser.Parser;
+import net.pterodactylus.arachne.parser.ParserFactory;
 import net.pterodactylus.arachne.parser.ParserListener;
 import de.ina.util.io.MessageDigestInputStream;
 import de.ina.util.validation.Validation;
@@ -28,6 +29,9 @@ class URLFetcher implements Runnable, ParserListener {
        /** The logger. */
        private static final Logger logger = Logger.getLogger(URLFetcher.class.getName());
 
+       /** The parser factory. */
+       private final ParserFactory parserFactory;
+
        /** The URL to fetch. */
        private final URL url;
 
@@ -46,14 +50,18 @@ class URLFetcher implements Runnable, ParserListener {
        /**
         * Creates a new fetcher for the given URL.
         *
+        * @param parserFactory
+        *            The parser factory that is used to create content-type
+        *            specific parsers
         * @param url
         *            The URL to fetch
         * @throws NoSuchAlgorithmException
         *             if no {@link MessageDigest} instance with an
         *             <code>SHA-256</code> algorithm can be created
         */
-       public URLFetcher(URL url) throws NoSuchAlgorithmException {
-               Validation.begin().isNotNull("url", url).check();
+       public URLFetcher(ParserFactory parserFactory, URL url) throws NoSuchAlgorithmException {
+               Validation.begin().isNotNull("parserFactory", parserFactory).isNotNull("url", url).check();
+               this.parserFactory = parserFactory;
                this.url = url;
                messageDigest = MessageDigest.getInstance("SHA-256");
        }
@@ -112,8 +120,8 @@ class URLFetcher implements Runnable, ParserListener {
                        logger.log(Level.INFO, "Type is “" + contentType + "”, length is " + contentLength + ".");
                        urlInputStream = urlConnection.getInputStream();
                        hashInputStream = new MessageDigestInputStream(urlInputStream, messageDigest);
-                       HtmlEditorKitParser htmlEditorKitParser = new HtmlEditorKitParser();
-                       htmlEditorKitParser.parse(this, hashInputStream, "UTF-8");
+                       Parser parser = parserFactory.getParser(contentType);
+                       parser.parse(this, hashInputStream, "UTF-8");
                        hash = messageDigest.digest();
                } catch (IOException ioe1) {
                        logger.log(Level.WARNING, "Could not fetch “" + url + "”.", ioe1);