Create message digest in core and hash fetched URLs.
authorDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Mon, 9 Mar 2009 23:12:41 +0000 (00:12 +0100)
committerDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Mon, 9 Mar 2009 23:12:41 +0000 (00:12 +0100)
src/net/pterodactylus/arachne/core/Core.java
src/net/pterodactylus/arachne/core/URLFetcher.java

index fb85953..f6b050d 100644 (file)
@@ -5,6 +5,7 @@ package net.pterodactylus.arachne.core;
 
 import java.net.MalformedURLException;
 import java.net.URL;
 
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.Executor;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.Executor;
@@ -164,8 +165,14 @@ public class Core extends AbstractService {
                                logger.log(Level.INFO, "Skipping “" + nextPage + "”.");
                                continue;
                        }
                                logger.log(Level.INFO, "Skipping “" + nextPage + "”.");
                                continue;
                        }
-                       URLFetcher urlFetcher = new URLFetcher(this, nextURL);
-                       urlFetcherExecutor.execute(urlFetcher);
+                       URLFetcher urlFetcher;
+                       try {
+                               logger.log(Level.INFO, "Fetching “" + nextURL + "”...");
+                               urlFetcher = new URLFetcher(this, nextURL);
+                               urlFetcherExecutor.execute(urlFetcher);
+                       } catch (NoSuchAlgorithmException nsae1) {
+                               logger.log(Level.SEVERE, "Could not get “SHA-256” message digest!", nsae1);
+                       }
                }
        }
 
                }
        }
 
index 0b4cb47..d57e15f 100644 (file)
@@ -5,11 +5,14 @@ import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.URLConnection;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.URLConnection;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
 import net.pterodactylus.arachne.parser.HtmlEditorKitParser;
 import net.pterodactylus.arachne.parser.ParserListener;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
 import net.pterodactylus.arachne.parser.HtmlEditorKitParser;
 import net.pterodactylus.arachne.parser.ParserListener;
+import de.ina.util.io.MessageDigestInputStream;
 import de.ina.util.validation.Validation;
 
 /**
 import de.ina.util.validation.Validation;
 
 /**
@@ -29,20 +32,50 @@ class URLFetcher implements Runnable, ParserListener {
        /** The URL to fetch. */
        private final URL url;
 
        /** The URL to fetch. */
        private final URL url;
 
+       /** The message digest. */
+       private final MessageDigest messageDigest;
+
+       /** The hash of the fetched URL. */
+       private byte[] hash;
+
        /**
         * Creates a new fetcher for the given URL.
         *
        /**
         * Creates a new fetcher for the given URL.
         *
+        * @param core
+        *            The core new pages are queued in
         * @param url
         *            The URL to fetch
         * @param url
         *            The URL to fetch
-        * @param core
-        *            TODO
+        * @throws NoSuchAlgorithmException
+        *             if no {@link MessageDigest} instance with an
+        *             <code>SHA-256</code> algorithm can be created
         */
         */
-       public URLFetcher(Core core, URL url) {
+       public URLFetcher(Core core, URL url) throws NoSuchAlgorithmException {
                Validation.begin().isNotNull("core", core).isNotNull("url", url).check();
                this.core = core;
                this.url = url;
                Validation.begin().isNotNull("core", core).isNotNull("url", url).check();
                this.core = core;
                this.url = url;
+               messageDigest = MessageDigest.getInstance("SHA-256");
        }
 
        }
 
+       //
+       // ACCESSORS
+       //
+
+       /**
+        * Returns the hash of the content of the fetched URL. The returned value is
+        * only valid after {@link #run()} has been called.
+        *
+        * @return The hash of the fetched content
+        */
+       public byte[] getHash() {
+               byte[] hashCopy = new byte[hash.length];
+               System.arraycopy(hash, 0, hashCopy, 0, hash.length);
+               return hashCopy;
+       }
+
+       //
+       // INTERFACE Runnable
+       //
+
        /**
         * {@inheritdoc}
         *
        /**
         * {@inheritdoc}
         *
@@ -50,13 +83,18 @@ class URLFetcher implements Runnable, ParserListener {
         */
        public void run() {
                logger.log(Level.INFO, "Starting URL Fetcher for “" + url + "”.");
         */
        public void run() {
                logger.log(Level.INFO, "Starting URL Fetcher for “" + url + "”.");
+               InputStream urlInputStream = null;
+               MessageDigestInputStream hashInputStream = null;
                try {
                        URLConnection urlConnection = url.openConnection();
                        long contentLength = urlConnection.getContentLength();
                        String contentType = urlConnection.getContentType();
                        logger.log(Level.INFO, "Type is “" + contentType + "”, length is " + contentLength + ".");
                try {
                        URLConnection urlConnection = url.openConnection();
                        long contentLength = urlConnection.getContentLength();
                        String contentType = urlConnection.getContentType();
                        logger.log(Level.INFO, "Type is “" + contentType + "”, length is " + contentLength + ".");
+                       urlInputStream = urlConnection.getInputStream();
+                       hashInputStream = new MessageDigestInputStream(urlInputStream, messageDigest);
                        HtmlEditorKitParser htmlEditorKitParser = new HtmlEditorKitParser();
                        HtmlEditorKitParser htmlEditorKitParser = new HtmlEditorKitParser();
-                       htmlEditorKitParser.parse(this, urlConnection.getInputStream(), "UTF-8");
+                       htmlEditorKitParser.parse(this, hashInputStream, "UTF-8");
+                       hash = messageDigest.digest();
                } catch (IOException ioe1) {
                        logger.log(Level.WARNING, "Could not fetch “" + url + "”.", ioe1);
                }
                } catch (IOException ioe1) {
                        logger.log(Level.WARNING, "Could not fetch “" + url + "”.", ioe1);
                }