From e6d67f8ff35ca69feb69811b4e09c52d3118fa9c Mon Sep 17 00:00:00 2001 From: =?utf8?q?David=20=E2=80=98Bombe=E2=80=99=20Roden?= Date: Tue, 10 Mar 2009 00:26:43 +0100 Subject: [PATCH] Remove core dependency. --- src/net/pterodactylus/arachne/core/URLFetcher.java | 31 ++++++++++++++-------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src/net/pterodactylus/arachne/core/URLFetcher.java b/src/net/pterodactylus/arachne/core/URLFetcher.java index d57e15f..ddfbddd 100644 --- a/src/net/pterodactylus/arachne/core/URLFetcher.java +++ b/src/net/pterodactylus/arachne/core/URLFetcher.java @@ -7,6 +7,8 @@ import java.net.URL; import java.net.URLConnection; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; +import java.util.ArrayList; +import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; @@ -26,9 +28,6 @@ class URLFetcher implements Runnable, ParserListener { /** The logger. */ private static final Logger logger = Logger.getLogger(URLFetcher.class.getName()); - /** The core. */ - private final Core core; - /** The URL to fetch. */ private final URL url; @@ -38,20 +37,20 @@ class URLFetcher implements Runnable, ParserListener { /** The hash of the fetched URL. */ private byte[] hash; + /** The collected URLs. */ + private final List collectedPages = new ArrayList(); + /** * Creates a new fetcher for the given URL. * - * @param core - * The core new pages are queued in * @param url * The URL to fetch * @throws NoSuchAlgorithmException * if no {@link MessageDigest} instance with an * SHA-256 algorithm can be created */ - public URLFetcher(Core core, URL url) throws NoSuchAlgorithmException { - Validation.begin().isNotNull("core", core).isNotNull("url", url).check(); - this.core = core; + public URLFetcher(URL url) throws NoSuchAlgorithmException { + Validation.begin().isNotNull("url", url).check(); this.url = url; messageDigest = MessageDigest.getInstance("SHA-256"); } @@ -61,6 +60,15 @@ class URLFetcher implements Runnable, ParserListener { // /** + * Returns the pages collected while parsing this URL. + * + * @return The collected pages + */ + public List getCollectedPages() { + return collectedPages; + } + + /** * Returns the hash of the content of the fetched URL. The returned value is * only valid after {@link #run()} has been called. * @@ -111,11 +119,12 @@ class URLFetcher implements Runnable, ParserListener { URL newLink = null; try { newLink = new URL(url, linkTarget); - core.addPage(newLink); + Page newPage = Page.fromURL(newLink); + if (newPage != null) { + collectedPages.add(newPage); + } } catch (MalformedURLException mue1) { logger.log(Level.WARNING, "Could not create URL from “" + url + "” and “" + linkTarget + "”.", mue1); - } catch (IllegalArgumentException iae1) { - logger.log(Level.WARNING, "Could not add “" + newLink + "” to core queue.", iae1); } } -- 2.7.4