X-Git-Url: https://git.pterodactylus.net/?a=blobdiff_plain;f=src%2Fnet%2Fpterodactylus%2Farachne%2Fcore%2FCore.java;h=9efc9607008621ecd4ae2400eca1e7bed31037ee;hb=a637923e6ea2bd6d53180b7e843b02a191b6d6e5;hp=f3fd5deaa5020841f7fa0ba6d9da58e6ac9b2f29;hpb=8e58edd3a64e784f23f2582b32851a01d7905f47;p=arachne.git diff --git a/src/net/pterodactylus/arachne/core/Core.java b/src/net/pterodactylus/arachne/core/Core.java index f3fd5de..9efc960 100644 --- a/src/net/pterodactylus/arachne/core/Core.java +++ b/src/net/pterodactylus/arachne/core/Core.java @@ -5,11 +5,17 @@ package net.pterodactylus.arachne.core; import java.net.MalformedURLException; import java.net.URL; +import java.security.NoSuchAlgorithmException; import java.util.ArrayList; -import java.util.Collections; import java.util.List; +import java.util.concurrent.Executor; +import java.util.concurrent.Executors; +import java.util.logging.Level; +import java.util.logging.Logger; +import net.pterodactylus.arachne.parser.ParserFactory; import de.ina.util.service.AbstractService; +import de.ina.util.thread.DumpingThreadFactory; import de.ina.util.validation.Validation; /** @@ -19,6 +25,9 @@ import de.ina.util.validation.Validation; */ public class Core extends AbstractService { + /** The logger. */ + private static final Logger logger = Logger.getLogger(Core.class.getName()); + // // PROPERTIES // @@ -33,8 +42,28 @@ public class Core extends AbstractService { // INTERNAL MEMBERS // + /** The parser factory. */ + private ParserFactory parserFactory = new ParserFactory(); + + /** Thread pool for the URL fetches. */ + private Executor urlFetcherExecutor = Executors.newFixedThreadPool(1, new DumpingThreadFactory("URLFetcher-")); + /** The current list of URLs to crawl. */ - private final List pages = Collections.synchronizedList(new ArrayList()); + private final List pages = new ArrayList(); + + // + // ACCESSORS + // + + /** + * Sets the host name of the node. + * + * @param nodeHost + * The node’s host name + */ + public void setNodeHost(String nodeHost) { + this.nodeHost = nodeHost; + } // // ACTIONS @@ -48,38 +77,6 @@ public class Core extends AbstractService { */ public void addPage(URL url) { Validation.begin().isNotNull("url", url).check().isEqual("url.getHost()", url.getHost(), (Object) nodeHost).isEqual("url.getPort()", url.getPort(), nodePort).check(); - String path = url.getPath(); - if (path.length() == 0) { - path = "/"; - } - String[] pathComponents = path.split("/"); - if (pathComponents.length < 2) { - throw new IllegalArgumentException("URL “" + url + "” is not a valid freenet page."); - } - String siteName = pathComponents[1]; - String[] siteComponents = siteName.split("@"); - if (siteComponents.length != 2) { - throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page."); - } - if (!"USK".equals(siteComponents[0]) && !"SSK".equals(siteComponents[0]) && !"CHK".equals(siteComponents[0])) { - throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page."); - } - if ("USK".equals(siteComponents[0])) { - Site site = new Site(siteComponents[1], pathComponents[2]); - Edition edition = new Edition(site, Integer.parseInt(pathComponents[3])); - Page page = new Page(edition, createPath(pathComponents, 4)); - addPage(page); - } - if ("SSK".equals(siteComponents[0])) { - int lastDash = pathComponents[2].lastIndexOf('-'); - String basename = pathComponents[2].substring(0, lastDash); - int editionNumber = Integer.parseInt(pathComponents[2].substring(lastDash + 1)); - Site site = new Site(siteComponents[1], basename); - Edition edition = new Edition(site, editionNumber); - Page page = new Page(edition, createPath(pathComponents, 3)); - addPage(page); - } - /* TODO: handle CHK */ } /** @@ -108,30 +105,48 @@ public class Core extends AbstractService { } // - // PRIVATE METHODS + // SERVICE METHODS // /** - * Creates a path from the given String array, starting at the given index. - * The path is created by joining all Strings from the array, separating - * them with a slash (‘/’). + * {@inheritdoc} * - * @param pathComponents - * The array of path components - * @param index - * The index of the first path components - * @return The joined path + * @see de.ina.util.service.AbstractService#serviceRun() */ - private String createPath(String[] pathComponents, int index) { - Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", pathComponents.length, index).check(); - StringBuilder path = new StringBuilder(); - for (int pathComponentIndex = index; pathComponentIndex < pathComponents.length; pathComponentIndex++) { - if (path.length() > 0) { - path.append('/'); + @Override + @SuppressWarnings("null") + protected void serviceRun() { + while (!shouldStop()) { + Page nextPage = null; + synchronized (syncObject) { + while (!shouldStop() && pages.isEmpty()) { + try { + syncObject.wait(); + } catch (InterruptedException ie1) { + /* ignore. */ + } + } + if (!shouldStop()) { + nextPage = pages.remove(0); + } + } + if (shouldStop()) { + break; + } + URL nextURL = nextPage.toURL(nodeHost, nodePort); + if (nextURL == null) { + logger.log(Level.INFO, "Skipping “" + nextPage + "”."); + continue; + } + URLFetcher urlFetcher; + try { + logger.log(Level.INFO, "Fetching “" + nextURL + "”..."); + urlFetcher = new URLFetcher(parserFactory, nextURL); + urlFetcherExecutor.execute(urlFetcher); + } catch (NoSuchAlgorithmException nsae1) { + logger.log(Level.SEVERE, "Could not get “SHA-256” message digest!", nsae1); } - path.append(pathComponents[pathComponentIndex]); } - return path.toString(); } }