2 * © 2009 David ‘Bombe’ Roden
4 package net.pterodactylus.arachne.core;
6 import java.net.MalformedURLException;
8 import java.security.NoSuchAlgorithmException;
9 import java.util.ArrayList;
10 import java.util.List;
11 import java.util.concurrent.Executor;
12 import java.util.concurrent.Executors;
13 import java.util.logging.Level;
14 import java.util.logging.Logger;
16 import de.ina.util.service.AbstractService;
17 import de.ina.util.thread.DumpingThreadFactory;
18 import de.ina.util.validation.Validation;
23 * @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
25 public class Core extends AbstractService {
28 private static final Logger logger = Logger.getLogger(Core.class.getName());
34 /** The host of the freenet node. */
35 private String nodeHost = "localhost";
37 /** The port of the freenet node. */
38 private int nodePort = 8888;
44 /** Thread pool for the URL fetches. */
45 private Executor urlFetcherExecutor = Executors.newFixedThreadPool(1, new DumpingThreadFactory("URLFetcher-"));
47 /** The current list of URLs to crawl. */
48 private final List<Page> pages = new ArrayList<Page>();
55 * Sets the host name of the node.
58 * The node’s host name
60 public void setNodeHost(String nodeHost) {
61 this.nodeHost = nodeHost;
69 * Adds the given URL to the list of pages to crawl.
74 public void addPage(URL url) {
75 Validation.begin().isNotNull("url", url).check().isEqual("url.getHost()", url.getHost(), (Object) nodeHost).isEqual("url.getPort()", url.getPort(), nodePort).check();
79 * Adds the given URL to the list of pages to crawl.
82 * The URL of the page to crawl
83 * @throws MalformedURLException
84 * if the URL is not a valid URL
86 public void addPage(String url) throws MalformedURLException {
87 Validation.begin().isNotNull("url", (Object) url).check();
88 addPage(new URL(url));
92 * Adds the given page to the list of pages to crawl.
97 public void addPage(Page page) {
98 Validation.begin().isNotNull("page", page).check();
110 * @see de.ina.util.service.AbstractService#serviceRun()
113 @SuppressWarnings("null")
114 protected void serviceRun() {
115 while (!shouldStop()) {
116 Page nextPage = null;
117 synchronized (syncObject) {
118 while (!shouldStop() && pages.isEmpty()) {
121 } catch (InterruptedException ie1) {
126 nextPage = pages.remove(0);
132 URL nextURL = nextPage.toURL(nodeHost, nodePort);
133 if (nextURL == null) {
134 logger.log(Level.INFO, "Skipping “" + nextPage + "”.");
137 URLFetcher urlFetcher;
139 logger.log(Level.INFO, "Fetching “" + nextURL + "”...");
140 urlFetcher = new URLFetcher(nextURL);
141 urlFetcherExecutor.execute(urlFetcher);
142 } catch (NoSuchAlgorithmException nsae1) {
143 logger.log(Level.SEVERE, "Could not get “SHA-256” message digest!", nsae1);