From: David ‘Bombe’ Roden Date: Mon, 9 Mar 2009 17:01:56 +0000 (+0100) Subject: Stub implementation of page fetching. X-Git-Url: https://git.pterodactylus.net/?a=commitdiff_plain;ds=sidebyside;h=ea810d645605a8f7cdcacdf828f839a85ebe9c21;hp=15aad6fc9873fae9d81c613441505c05ae258c40;p=arachne.git Stub implementation of page fetching. --- diff --git a/src/net/pterodactylus/arachne/core/Core.java b/src/net/pterodactylus/arachne/core/Core.java index f3fd5de..ad438b1 100644 --- a/src/net/pterodactylus/arachne/core/Core.java +++ b/src/net/pterodactylus/arachne/core/Core.java @@ -6,10 +6,14 @@ package net.pterodactylus.arachne.core; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; -import java.util.Collections; import java.util.List; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.logging.Level; +import java.util.logging.Logger; import de.ina.util.service.AbstractService; +import de.ina.util.thread.DumpingThreadFactory; import de.ina.util.validation.Validation; /** @@ -19,6 +23,9 @@ import de.ina.util.validation.Validation; */ public class Core extends AbstractService { + /** The logger. */ + private static final Logger logger = Logger.getLogger(Core.class.getName()); + // // PROPERTIES // @@ -33,8 +40,11 @@ public class Core extends AbstractService { // INTERNAL MEMBERS // + /** Thread pool for the URL fetches. */ + private Executor urlFetcherExecutor = Executors.newFixedThreadPool(1, new DumpingThreadFactory("URLFetcher-")); + /** The current list of URLs to crawl. */ - private final List pages = Collections.synchronizedList(new ArrayList()); + private final List pages = new ArrayList(); // // ACTIONS @@ -108,6 +118,44 @@ public class Core extends AbstractService { } // + // SERVICE METHODS + // + + /** + * {@inheritdoc} + * + * @see de.ina.util.service.AbstractService#serviceRun() + */ + @Override + protected void serviceRun() { + while (!shouldStop()) { + Page nextPage = null; + synchronized (syncObject) { + while (!shouldStop() && pages.isEmpty()) { + try { + syncObject.wait(); + } catch (InterruptedException ie1) { + /* ignore. */ + } + } + if (!shouldStop()) { + nextPage = pages.remove(0); + } + } + if (shouldStop()) { + break; + } + URL nextURL = createURL(nextPage); + if (nextURL == null) { + logger.log(Level.INFO, "Skipping “" + nextPage + "”."); + continue; + } + URLFetcher urlFetcher = new URLFetcher(this, nextURL); + urlFetcherExecutor.execute(urlFetcher); + } + } + + // // PRIVATE METHODS // @@ -123,7 +171,7 @@ public class Core extends AbstractService { * @return The joined path */ private String createPath(String[] pathComponents, int index) { - Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", pathComponents.length, index).check(); + Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", index, pathComponents.length).check(); StringBuilder path = new StringBuilder(); for (int pathComponentIndex = index; pathComponentIndex < pathComponents.length; pathComponentIndex++) { if (path.length() > 0) { @@ -134,4 +182,21 @@ public class Core extends AbstractService { return path.toString(); } + /** + * Creates a URL from the given page. + * + * @param page + * The page to create a URL from + * @return The created URL, or null if the URL could not be + * created + */ + private URL createURL(Page page) { + try { + return new URL("http://" + nodeHost + ":" + nodePort + "/"); + } catch (MalformedURLException mue1) { + /* nearly impossible. */ + } + return null; + } + }