From: David ‘Bombe’ Roden Date: Mon, 9 Mar 2009 23:12:41 +0000 (+0100) Subject: Create message digest in core and hash fetched URLs. X-Git-Url: https://git.pterodactylus.net/?p=arachne.git;a=commitdiff_plain;h=cb88a7c9368c4bc32ba5c6caf32c5c51110fde35 Create message digest in core and hash fetched URLs. --- diff --git a/src/net/pterodactylus/arachne/core/Core.java b/src/net/pterodactylus/arachne/core/Core.java index fb85953..f6b050d 100644 --- a/src/net/pterodactylus/arachne/core/Core.java +++ b/src/net/pterodactylus/arachne/core/Core.java @@ -5,6 +5,7 @@ package net.pterodactylus.arachne.core; import java.net.MalformedURLException; import java.net.URL; +import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.Executor; @@ -164,8 +165,14 @@ public class Core extends AbstractService { logger.log(Level.INFO, "Skipping “" + nextPage + "”."); continue; } - URLFetcher urlFetcher = new URLFetcher(this, nextURL); - urlFetcherExecutor.execute(urlFetcher); + URLFetcher urlFetcher; + try { + logger.log(Level.INFO, "Fetching “" + nextURL + "”..."); + urlFetcher = new URLFetcher(this, nextURL); + urlFetcherExecutor.execute(urlFetcher); + } catch (NoSuchAlgorithmException nsae1) { + logger.log(Level.SEVERE, "Could not get “SHA-256” message digest!", nsae1); + } } } diff --git a/src/net/pterodactylus/arachne/core/URLFetcher.java b/src/net/pterodactylus/arachne/core/URLFetcher.java index 0b4cb47..d57e15f 100644 --- a/src/net/pterodactylus/arachne/core/URLFetcher.java +++ b/src/net/pterodactylus/arachne/core/URLFetcher.java @@ -5,11 +5,14 @@ import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; import java.util.logging.Level; import java.util.logging.Logger; import net.pterodactylus.arachne.parser.HtmlEditorKitParser; import net.pterodactylus.arachne.parser.ParserListener; +import de.ina.util.io.MessageDigestInputStream; import de.ina.util.validation.Validation; /** @@ -29,20 +32,50 @@ class URLFetcher implements Runnable, ParserListener { /** The URL to fetch. */ private final URL url; + /** The message digest. */ + private final MessageDigest messageDigest; + + /** The hash of the fetched URL. */ + private byte[] hash; + /** * Creates a new fetcher for the given URL. * + * @param core + * The core new pages are queued in * @param url * The URL to fetch - * @param core - * TODO + * @throws NoSuchAlgorithmException + * if no {@link MessageDigest} instance with an + * SHA-256 algorithm can be created */ - public URLFetcher(Core core, URL url) { + public URLFetcher(Core core, URL url) throws NoSuchAlgorithmException { Validation.begin().isNotNull("core", core).isNotNull("url", url).check(); this.core = core; this.url = url; + messageDigest = MessageDigest.getInstance("SHA-256"); } + // + // ACCESSORS + // + + /** + * Returns the hash of the content of the fetched URL. The returned value is + * only valid after {@link #run()} has been called. + * + * @return The hash of the fetched content + */ + public byte[] getHash() { + byte[] hashCopy = new byte[hash.length]; + System.arraycopy(hash, 0, hashCopy, 0, hash.length); + return hashCopy; + } + + // + // INTERFACE Runnable + // + /** * {@inheritdoc} * @@ -50,13 +83,18 @@ class URLFetcher implements Runnable, ParserListener { */ public void run() { logger.log(Level.INFO, "Starting URL Fetcher for “" + url + "”."); + InputStream urlInputStream = null; + MessageDigestInputStream hashInputStream = null; try { URLConnection urlConnection = url.openConnection(); long contentLength = urlConnection.getContentLength(); String contentType = urlConnection.getContentType(); logger.log(Level.INFO, "Type is “" + contentType + "”, length is " + contentLength + "."); + urlInputStream = urlConnection.getInputStream(); + hashInputStream = new MessageDigestInputStream(urlInputStream, messageDigest); HtmlEditorKitParser htmlEditorKitParser = new HtmlEditorKitParser(); - htmlEditorKitParser.parse(this, urlConnection.getInputStream(), "UTF-8"); + htmlEditorKitParser.parse(this, hashInputStream, "UTF-8"); + hash = messageDigest.digest(); } catch (IOException ioe1) { logger.log(Level.WARNING, "Could not fetch “" + url + "”.", ioe1); }