2 * © 2009 David ‘Bombe’ Roden
4 package net.pterodactylus.arachne.core;
6 import java.net.MalformedURLException;
8 import java.util.ArrayList;
10 import java.util.concurrent.Executor;
11 import java.util.concurrent.Executors;
12 import java.util.logging.Level;
13 import java.util.logging.Logger;
15 import de.ina.util.service.AbstractService;
16 import de.ina.util.thread.DumpingThreadFactory;
17 import de.ina.util.validation.Validation;
22 * @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
24 public class Core extends AbstractService {
27 private static final Logger logger = Logger.getLogger(Core.class.getName());
33 /** The host of the freenet node. */
34 private String nodeHost = "localhost";
36 /** The port of the freenet node. */
37 private int nodePort = 8888;
43 /** Thread pool for the URL fetches. */
44 private Executor urlFetcherExecutor = Executors.newFixedThreadPool(1, new DumpingThreadFactory("URLFetcher-"));
46 /** The current list of URLs to crawl. */
47 private final List<Page> pages = new ArrayList<Page>();
54 * Adds the given URL to the list of pages to crawl.
59 public void addPage(URL url) {
60 Validation.begin().isNotNull("url", url).check().isEqual("url.getHost()", url.getHost(), (Object) nodeHost).isEqual("url.getPort()", url.getPort(), nodePort).check();
61 String path = url.getPath();
62 if (path.length() == 0) {
65 String[] pathComponents = path.split("/");
66 if (pathComponents.length < 2) {
67 throw new IllegalArgumentException("URL “" + url + "” is not a valid freenet page.");
69 String siteName = pathComponents[1];
70 String[] siteComponents = siteName.split("@");
71 if (siteComponents.length != 2) {
72 throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page.");
74 if (!"USK".equals(siteComponents[0]) && !"SSK".equals(siteComponents[0]) && !"CHK".equals(siteComponents[0])) {
75 throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page.");
77 if ("USK".equals(siteComponents[0])) {
78 Site site = new Site(siteComponents[1], pathComponents[2]);
79 Edition edition = new Edition(site, Integer.parseInt(pathComponents[3]));
80 Page page = new Page(edition, createPath(pathComponents, 4));
83 if ("SSK".equals(siteComponents[0])) {
84 int lastDash = pathComponents[2].lastIndexOf('-');
85 String basename = pathComponents[2].substring(0, lastDash);
86 int editionNumber = Integer.parseInt(pathComponents[2].substring(lastDash + 1));
87 Site site = new Site(siteComponents[1], basename);
88 Edition edition = new Edition(site, editionNumber);
89 Page page = new Page(edition, createPath(pathComponents, 3));
92 /* TODO: handle CHK */
96 * Adds the given URL to the list of pages to crawl.
99 * The URL of the page to crawl
100 * @throws MalformedURLException
101 * if the URL is not a valid URL
103 public void addPage(String url) throws MalformedURLException {
104 Validation.begin().isNotNull("url", (Object) url).check();
105 addPage(new URL(url));
109 * Adds the given page to the list of pages to crawl.
114 public void addPage(Page page) {
115 Validation.begin().isNotNull("page", page).check();
127 * @see de.ina.util.service.AbstractService#serviceRun()
130 protected void serviceRun() {
131 while (!shouldStop()) {
132 Page nextPage = null;
133 synchronized (syncObject) {
134 while (!shouldStop() && pages.isEmpty()) {
137 } catch (InterruptedException ie1) {
142 nextPage = pages.remove(0);
148 URL nextURL = createURL(nextPage);
149 if (nextURL == null) {
150 logger.log(Level.INFO, "Skipping “" + nextPage + "”.");
153 URLFetcher urlFetcher = new URLFetcher(this, nextURL);
154 urlFetcherExecutor.execute(urlFetcher);
163 * Creates a path from the given String array, starting at the given index.
164 * The path is created by joining all Strings from the array, separating
165 * them with a slash (‘/’).
167 * @param pathComponents
168 * The array of path components
170 * The index of the first path components
171 * @return The joined path
173 private String createPath(String[] pathComponents, int index) {
174 Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", index, pathComponents.length).check();
175 StringBuilder path = new StringBuilder();
176 for (int pathComponentIndex = index; pathComponentIndex < pathComponents.length; pathComponentIndex++) {
177 if (path.length() > 0) {
180 path.append(pathComponents[pathComponentIndex]);
182 return path.toString();
186 * Creates a URL from the given page.
189 * The page to create a URL from
190 * @return The created URL, or <code>null</code> if the URL could not be
193 private URL createURL(Page page) {
195 return new URL("http://" + nodeHost + ":" + nodePort + "/");
196 } catch (MalformedURLException mue1) {
197 /* nearly impossible. */