Create message digest in core and hash fetched URLs.
[arachne.git] / src / net / pterodactylus / arachne / core / Core.java
index f3fd5de..f6b050d 100644 (file)
@@ -5,11 +5,16 @@ package net.pterodactylus.arachne.core;
 
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.security.NoSuchAlgorithmException;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
+import java.util.concurrent.Executor;
+import java.util.concurrent.Executors;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
 import de.ina.util.service.AbstractService;
+import de.ina.util.thread.DumpingThreadFactory;
 import de.ina.util.validation.Validation;
 
 /**
@@ -19,6 +24,9 @@ import de.ina.util.validation.Validation;
  */
 public class Core extends AbstractService {
 
+       /** The logger. */
+       private static final Logger logger = Logger.getLogger(Core.class.getName());
+
        //
        // PROPERTIES
        //
@@ -33,8 +41,25 @@ public class Core extends AbstractService {
        // INTERNAL MEMBERS
        //
 
+       /** Thread pool for the URL fetches. */
+       private Executor urlFetcherExecutor = Executors.newFixedThreadPool(1, new DumpingThreadFactory("URLFetcher-"));
+
        /** The current list of URLs to crawl. */
-       private final List<Page> pages = Collections.synchronizedList(new ArrayList<Page>());
+       private final List<Page> pages = new ArrayList<Page>();
+
+       //
+       // ACCESSORS
+       //
+
+       /**
+        * Sets the host name of the node.
+        *
+        * @param nodeHost
+        *            The node’s host name
+        */
+       public void setNodeHost(String nodeHost) {
+               this.nodeHost = nodeHost;
+       }
 
        //
        // ACTIONS
@@ -108,6 +133,50 @@ public class Core extends AbstractService {
        }
 
        //
+       // SERVICE METHODS
+       //
+
+       /**
+        * {@inheritdoc}
+        *
+        * @see de.ina.util.service.AbstractService#serviceRun()
+        */
+       @Override
+       protected void serviceRun() {
+               while (!shouldStop()) {
+                       Page nextPage = null;
+                       synchronized (syncObject) {
+                               while (!shouldStop() && pages.isEmpty()) {
+                                       try {
+                                               syncObject.wait();
+                                       } catch (InterruptedException ie1) {
+                                               /* ignore. */
+                                       }
+                               }
+                               if (!shouldStop()) {
+                                       nextPage = pages.remove(0);
+                               }
+                       }
+                       if (shouldStop()) {
+                               break;
+                       }
+                       URL nextURL = createURL(nextPage);
+                       if (nextURL == null) {
+                               logger.log(Level.INFO, "Skipping “" + nextPage + "”.");
+                               continue;
+                       }
+                       URLFetcher urlFetcher;
+                       try {
+                               logger.log(Level.INFO, "Fetching “" + nextURL + "”...");
+                               urlFetcher = new URLFetcher(this, nextURL);
+                               urlFetcherExecutor.execute(urlFetcher);
+                       } catch (NoSuchAlgorithmException nsae1) {
+                               logger.log(Level.SEVERE, "Could not get “SHA-256” message digest!", nsae1);
+                       }
+               }
+       }
+
+       //
        // PRIVATE METHODS
        //
 
@@ -123,7 +192,7 @@ public class Core extends AbstractService {
         * @return The joined path
         */
        private String createPath(String[] pathComponents, int index) {
-               Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", pathComponents.length, index).check();
+               Validation.begin().isNotNull("pathComponents", pathComponents).check().isLessOrEqual("index", index, pathComponents.length).check();
                StringBuilder path = new StringBuilder();
                for (int pathComponentIndex = index; pathComponentIndex < pathComponents.length; pathComponentIndex++) {
                        if (path.length() > 0) {
@@ -134,4 +203,21 @@ public class Core extends AbstractService {
                return path.toString();
        }
 
+       /**
+        * Creates a URL from the given page.
+        *
+        * @param page
+        *            The page to create a URL from
+        * @return The created URL, or <code>null</code> if the URL could not be
+        *         created
+        */
+       private URL createURL(Page page) {
+               try {
+                       return new URL("http://" + nodeHost + ":" + nodePort + "/SSK@" + page.getEdition().getSite().getKey() + "/" + page.getEdition().getSite().getBasename() + "-" + page.getEdition().getNumber() + "/" + page.getPath());
+               } catch (MalformedURLException mue1) {
+                       /* nearly impossible. */
+               }
+               return null;
+       }
+
 }