First implementation of the Arachne core.
authorDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Mon, 9 Mar 2009 14:43:58 +0000 (15:43 +0100)
committerDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Mon, 9 Mar 2009 14:43:58 +0000 (15:43 +0100)
src/net/pterodactylus/arachne/core/Core.java [new file with mode: 0644]

diff --git a/src/net/pterodactylus/arachne/core/Core.java b/src/net/pterodactylus/arachne/core/Core.java
new file mode 100644 (file)
index 0000000..f3fd5de
--- /dev/null
@@ -0,0 +1,137 @@
+/*
+ * © 2009 David ‘Bombe’ Roden
+ */
+package net.pterodactylus.arachne.core;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import de.ina.util.service.AbstractService;
+import de.ina.util.validation.Validation;
+
+/**
+ * Arachne’s core.
+ *
+ * @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
+ */
+public class Core extends AbstractService {
+
+       //
+       // PROPERTIES
+       //
+
+       /** The host of the freenet node. */
+       private String nodeHost = "localhost";
+
+       /** The port of the freenet node. */
+       private int nodePort = 8888;
+
+       //
+       // INTERNAL MEMBERS
+       //
+
+       /** The current list of URLs to crawl. */
+       private final List<Page> pages = Collections.synchronizedList(new ArrayList<Page>());
+
+       //
+       // ACTIONS
+       //
+
+       /**
+        * Adds the given URL to the list of pages to crawl.
+        *
+        * @param url
+        *            The URL to add
+        */
+       public void addPage(URL url) {
+               Validation.begin().isNotNull("url", url).check().isEqual("url.getHost()", url.getHost(), (Object) nodeHost).isEqual("url.getPort()", url.getPort(), nodePort).check();
+               String path = url.getPath();
+               if (path.length() == 0) {
+                       path = "/";
+               }
+               String[] pathComponents = path.split("/");
+               if (pathComponents.length < 2) {
+                       throw new IllegalArgumentException("URL “" + url + "” is not a valid freenet page.");
+               }
+               String siteName = pathComponents[1];
+               String[] siteComponents = siteName.split("@");
+               if (siteComponents.length != 2) {
+                       throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page.");
+               }
+               if (!"USK".equals(siteComponents[0]) && !"SSK".equals(siteComponents[0]) && !"CHK".equals(siteComponents[0])) {
+                       throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page.");
+               }
+               if ("USK".equals(siteComponents[0])) {
+                       Site site = new Site(siteComponents[1], pathComponents[2]);
+                       Edition edition = new Edition(site, Integer.parseInt(pathComponents[3]));
+                       Page page = new Page(edition, createPath(pathComponents, 4));
+                       addPage(page);
+               }
+               if ("SSK".equals(siteComponents[0])) {
+                       int lastDash = pathComponents[2].lastIndexOf('-');
+                       String basename = pathComponents[2].substring(0, lastDash);
+                       int editionNumber = Integer.parseInt(pathComponents[2].substring(lastDash + 1));
+                       Site site = new Site(siteComponents[1], basename);
+                       Edition edition = new Edition(site, editionNumber);
+                       Page page = new Page(edition, createPath(pathComponents, 3));
+                       addPage(page);
+               }
+               /* TODO: handle CHK */
+       }
+
+       /**
+        * Adds the given URL to the list of pages to crawl.
+        *
+        * @param url
+        *            The URL of the page to crawl
+        * @throws MalformedURLException
+        *             if the URL is not a valid URL
+        */
+       public void addPage(String url) throws MalformedURLException {
+               Validation.begin().isNotNull("url", (Object) url).check();
+               addPage(new URL(url));
+       }
+
+       /**
+        * Adds the given page to the list of pages to crawl.
+        *
+        * @param page
+        *            The page to add
+        */
+       public void addPage(Page page) {
+               Validation.begin().isNotNull("page", page).check();
+               pages.add(page);
+               notifySyncObject();
+       }
+
+       //
+       // PRIVATE METHODS
+       //
+
+       /**
+        * Creates a path from the given String array, starting at the given index.
+        * The path is created by joining all Strings from the array, separating
+        * them with a slash (‘/’).
+        *
+        * @param pathComponents
+        *            The array of path components
+        * @param index
+        *            The index of the first path components
+        * @return The joined path
+        */
+       private String createPath(String[] pathComponents, int index) {
+               Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", pathComponents.length, index).check();
+               StringBuilder path = new StringBuilder();
+               for (int pathComponentIndex = index; pathComponentIndex < pathComponents.length; pathComponentIndex++) {
+                       if (path.length() > 0) {
+                               path.append('/');
+                       }
+                       path.append(pathComponents[pathComponentIndex]);
+               }
+               return path.toString();
+       }
+
+}