import java.net.MalformedURLException;
import java.net.URL;
+import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
+import java.util.concurrent.Executor;
+import java.util.concurrent.Executors;
+import java.util.logging.Level;
+import java.util.logging.Logger;
import de.ina.util.service.AbstractService;
+import de.ina.util.thread.DumpingThreadFactory;
import de.ina.util.validation.Validation;
/**
*/
public class Core extends AbstractService {
+ /** The logger. */
+ private static final Logger logger = Logger.getLogger(Core.class.getName());
+
//
// PROPERTIES
//
// INTERNAL MEMBERS
//
+ /** Thread pool for the URL fetches. */
+ private Executor urlFetcherExecutor = Executors.newFixedThreadPool(1, new DumpingThreadFactory("URLFetcher-"));
+
/** The current list of URLs to crawl. */
- private final List<Page> pages = Collections.synchronizedList(new ArrayList<Page>());
+ private final List<Page> pages = new ArrayList<Page>();
+
+ //
+ // ACCESSORS
+ //
+
+ /**
+ * Sets the host name of the node.
+ *
+ * @param nodeHost
+ * The node’s host name
+ */
+ public void setNodeHost(String nodeHost) {
+ this.nodeHost = nodeHost;
+ }
//
// ACTIONS
*/
public void addPage(URL url) {
Validation.begin().isNotNull("url", url).check().isEqual("url.getHost()", url.getHost(), (Object) nodeHost).isEqual("url.getPort()", url.getPort(), nodePort).check();
- String path = url.getPath();
- if (path.length() == 0) {
- path = "/";
- }
- String[] pathComponents = path.split("/");
- if (pathComponents.length < 2) {
- throw new IllegalArgumentException("URL “" + url + "” is not a valid freenet page.");
- }
- String siteName = pathComponents[1];
- String[] siteComponents = siteName.split("@");
- if (siteComponents.length != 2) {
- throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page.");
- }
- if (!"USK".equals(siteComponents[0]) && !"SSK".equals(siteComponents[0]) && !"CHK".equals(siteComponents[0])) {
- throw new IllegalArgumentException("siteName “" + siteName + "” is not a valid freenet page.");
- }
- if ("USK".equals(siteComponents[0])) {
- Site site = new Site(siteComponents[1], pathComponents[2]);
- Edition edition = new Edition(site, Integer.parseInt(pathComponents[3]));
- Page page = new Page(edition, createPath(pathComponents, 4));
- addPage(page);
- }
- if ("SSK".equals(siteComponents[0])) {
- int lastDash = pathComponents[2].lastIndexOf('-');
- String basename = pathComponents[2].substring(0, lastDash);
- int editionNumber = Integer.parseInt(pathComponents[2].substring(lastDash + 1));
- Site site = new Site(siteComponents[1], basename);
- Edition edition = new Edition(site, editionNumber);
- Page page = new Page(edition, createPath(pathComponents, 3));
- addPage(page);
- }
- /* TODO: handle CHK */
}
/**
}
//
- // PRIVATE METHODS
+ // SERVICE METHODS
//
/**
- * Creates a path from the given String array, starting at the given index.
- * The path is created by joining all Strings from the array, separating
- * them with a slash (‘/’).
+ * {@inheritdoc}
*
- * @param pathComponents
- * The array of path components
- * @param index
- * The index of the first path components
- * @return The joined path
+ * @see de.ina.util.service.AbstractService#serviceRun()
*/
- private String createPath(String[] pathComponents, int index) {
- Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", pathComponents.length, index).check();
- StringBuilder path = new StringBuilder();
- for (int pathComponentIndex = index; pathComponentIndex < pathComponents.length; pathComponentIndex++) {
- if (path.length() > 0) {
- path.append('/');
+ @Override
+ @SuppressWarnings("null")
+ protected void serviceRun() {
+ while (!shouldStop()) {
+ Page nextPage = null;
+ synchronized (syncObject) {
+ while (!shouldStop() && pages.isEmpty()) {
+ try {
+ syncObject.wait();
+ } catch (InterruptedException ie1) {
+ /* ignore. */
+ }
+ }
+ if (!shouldStop()) {
+ nextPage = pages.remove(0);
+ }
+ }
+ if (shouldStop()) {
+ break;
+ }
+ URL nextURL = nextPage.toURL(nodeHost, nodePort);
+ if (nextURL == null) {
+ logger.log(Level.INFO, "Skipping “" + nextPage + "”.");
+ continue;
+ }
+ URLFetcher urlFetcher;
+ try {
+ logger.log(Level.INFO, "Fetching “" + nextURL + "”...");
+ urlFetcher = new URLFetcher(nextURL);
+ urlFetcherExecutor.execute(urlFetcher);
+ } catch (NoSuchAlgorithmException nsae1) {
+ logger.log(Level.SEVERE, "Could not get “SHA-256” message digest!", nsae1);
}
- path.append(pathComponents[pathComponentIndex]);
}
- return path.toString();
}
}