From: David ‘Bombe’ Roden Date: Mon, 9 Mar 2009 14:43:58 +0000 (+0100) Subject: First implementation of the Arachne core. X-Git-Url: https://git.pterodactylus.net/?a=commitdiff_plain;h=8e58edd3a64e784f23f2582b32851a01d7905f47;p=arachne.git First implementation of the Arachne core. --- diff --git a/src/net/pterodactylus/arachne/core/Core.java b/src/net/pterodactylus/arachne/core/Core.java new file mode 100644 index 0000000..f3fd5de --- /dev/null +++ b/src/net/pterodactylus/arachne/core/Core.java @@ -0,0 +1,137 @@ +/* + * © 2009 David ‘Bombe’ Roden + */ +package net.pterodactylus.arachne.core; + +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import de.ina.util.service.AbstractService; +import de.ina.util.validation.Validation; + +/** + * Arachne’s core. + * + * @author David ‘Bombe’ Roden + */ +public class Core extends AbstractService { + + // + // PROPERTIES + // + + /** The host of the freenet node. */ + private String nodeHost = "localhost"; + + /** The port of the freenet node. */ + private int nodePort = 8888; + + // + // INTERNAL MEMBERS + // + + /** The current list of URLs to crawl. */ + private final List pages = Collections.synchronizedList(new ArrayList()); + + // + // ACTIONS + // + + /** + * Adds the given URL to the list of pages to crawl. + * + * @param url + * The URL to add + */ + public void addPage(URL url) { + Validation.begin().isNotNull("url", url).check().isEqual("url.getHost()", url.getHost(), (Object) nodeHost).isEqual("url.getPort()", url.getPort(), nodePort).check(); + String path = url.getPath(); + if (path.length() == 0) { + path = "/"; + } + String[] pathComponents = path.split("/"); + if (pathComponents.length < 2) { + throw new IllegalArgumentException("URL “" + url + "” is not a valid freenet page."); + } + String siteName = pathComponents[1]; + String[] siteComponents = siteName.split("@"); + if (siteComponents.length != 2) { + throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page."); + } + if (!"USK".equals(siteComponents[0]) && !"SSK".equals(siteComponents[0]) && !"CHK".equals(siteComponents[0])) { + throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page."); + } + if ("USK".equals(siteComponents[0])) { + Site site = new Site(siteComponents[1], pathComponents[2]); + Edition edition = new Edition(site, Integer.parseInt(pathComponents[3])); + Page page = new Page(edition, createPath(pathComponents, 4)); + addPage(page); + } + if ("SSK".equals(siteComponents[0])) { + int lastDash = pathComponents[2].lastIndexOf('-'); + String basename = pathComponents[2].substring(0, lastDash); + int editionNumber = Integer.parseInt(pathComponents[2].substring(lastDash + 1)); + Site site = new Site(siteComponents[1], basename); + Edition edition = new Edition(site, editionNumber); + Page page = new Page(edition, createPath(pathComponents, 3)); + addPage(page); + } + /* TODO: handle CHK */ + } + + /** + * Adds the given URL to the list of pages to crawl. + * + * @param url + * The URL of the page to crawl + * @throws MalformedURLException + * if the URL is not a valid URL + */ + public void addPage(String url) throws MalformedURLException { + Validation.begin().isNotNull("url", (Object) url).check(); + addPage(new URL(url)); + } + + /** + * Adds the given page to the list of pages to crawl. + * + * @param page + * The page to add + */ + public void addPage(Page page) { + Validation.begin().isNotNull("page", page).check(); + pages.add(page); + notifySyncObject(); + } + + // + // PRIVATE METHODS + // + + /** + * Creates a path from the given String array, starting at the given index. + * The path is created by joining all Strings from the array, separating + * them with a slash (‘/’). + * + * @param pathComponents + * The array of path components + * @param index + * The index of the first path components + * @return The joined path + */ + private String createPath(String[] pathComponents, int index) { + Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", pathComponents.length, index).check(); + StringBuilder path = new StringBuilder(); + for (int pathComponentIndex = index; pathComponentIndex < pathComponents.length; pathComponentIndex++) { + if (path.length() > 0) { + path.append('/'); + } + path.append(pathComponents[pathComponentIndex]); + } + return path.toString(); + } + +}