2 * © 2009 David ‘Bombe’ Roden
4 package net.pterodactylus.arachne.core;
6 import java.net.MalformedURLException;
8 import java.util.ArrayList;
9 import java.util.Collections;
10 import java.util.List;
12 import de.ina.util.service.AbstractService;
13 import de.ina.util.validation.Validation;
18 * @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
20 public class Core extends AbstractService {
26 /** The host of the freenet node. */
27 private String nodeHost = "localhost";
29 /** The port of the freenet node. */
30 private int nodePort = 8888;
36 /** The current list of URLs to crawl. */
37 private final List<Page> pages = Collections.synchronizedList(new ArrayList<Page>());
44 * Adds the given URL to the list of pages to crawl.
49 public void addPage(URL url) {
50 Validation.begin().isNotNull("url", url).check().isEqual("url.getHost()", url.getHost(), (Object) nodeHost).isEqual("url.getPort()", url.getPort(), nodePort).check();
51 String path = url.getPath();
52 if (path.length() == 0) {
55 String[] pathComponents = path.split("/");
56 if (pathComponents.length < 2) {
57 throw new IllegalArgumentException("URL “" + url + "” is not a valid freenet page.");
59 String siteName = pathComponents[1];
60 String[] siteComponents = siteName.split("@");
61 if (siteComponents.length != 2) {
62 throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page.");
64 if (!"USK".equals(siteComponents[0]) && !"SSK".equals(siteComponents[0]) && !"CHK".equals(siteComponents[0])) {
65 throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page.");
67 if ("USK".equals(siteComponents[0])) {
68 Site site = new Site(siteComponents[1], pathComponents[2]);
69 Edition edition = new Edition(site, Integer.parseInt(pathComponents[3]));
70 Page page = new Page(edition, createPath(pathComponents, 4));
73 if ("SSK".equals(siteComponents[0])) {
74 int lastDash = pathComponents[2].lastIndexOf('-');
75 String basename = pathComponents[2].substring(0, lastDash);
76 int editionNumber = Integer.parseInt(pathComponents[2].substring(lastDash + 1));
77 Site site = new Site(siteComponents[1], basename);
78 Edition edition = new Edition(site, editionNumber);
79 Page page = new Page(edition, createPath(pathComponents, 3));
82 /* TODO: handle CHK */
86 * Adds the given URL to the list of pages to crawl.
89 * The URL of the page to crawl
90 * @throws MalformedURLException
91 * if the URL is not a valid URL
93 public void addPage(String url) throws MalformedURLException {
94 Validation.begin().isNotNull("url", (Object) url).check();
95 addPage(new URL(url));
99 * Adds the given page to the list of pages to crawl.
104 public void addPage(Page page) {
105 Validation.begin().isNotNull("page", page).check();
115 * Creates a path from the given String array, starting at the given index.
116 * The path is created by joining all Strings from the array, separating
117 * them with a slash (‘/’).
119 * @param pathComponents
120 * The array of path components
122 * The index of the first path components
123 * @return The joined path
125 private String createPath(String[] pathComponents, int index) {
126 Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", pathComponents.length, index).check();
127 StringBuilder path = new StringBuilder();
128 for (int pathComponentIndex = index; pathComponentIndex < pathComponents.length; pathComponentIndex++) {
129 if (path.length() > 0) {
132 path.append(pathComponents[pathComponentIndex]);
134 return path.toString();