Stub implementation of page fetching.
authorDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Mon, 9 Mar 2009 17:01:56 +0000 (18:01 +0100)
committerDavid ‘Bombe’ Roden <bombe@pterodactylus.net>
Mon, 9 Mar 2009 17:01:56 +0000 (18:01 +0100)
src/net/pterodactylus/arachne/core/Core.java

index f3fd5de..ad438b1 100644 (file)
@@ -6,10 +6,14 @@ package net.pterodactylus.arachne.core;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
+import java.util.concurrent.Executor;
+import java.util.concurrent.Executors;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
 import de.ina.util.service.AbstractService;
+import de.ina.util.thread.DumpingThreadFactory;
 import de.ina.util.validation.Validation;
 
 /**
@@ -19,6 +23,9 @@ import de.ina.util.validation.Validation;
  */
 public class Core extends AbstractService {
 
+       /** The logger. */
+       private static final Logger logger = Logger.getLogger(Core.class.getName());
+
        //
        // PROPERTIES
        //
@@ -33,8 +40,11 @@ public class Core extends AbstractService {
        // INTERNAL MEMBERS
        //
 
+       /** Thread pool for the URL fetches. */
+       private Executor urlFetcherExecutor = Executors.newFixedThreadPool(1, new DumpingThreadFactory("URLFetcher-"));
+
        /** The current list of URLs to crawl. */
-       private final List<Page> pages = Collections.synchronizedList(new ArrayList<Page>());
+       private final List<Page> pages = new ArrayList<Page>();
 
        //
        // ACTIONS
@@ -108,6 +118,44 @@ public class Core extends AbstractService {
        }
 
        //
+       // SERVICE METHODS
+       //
+
+       /**
+        * {@inheritdoc}
+        *
+        * @see de.ina.util.service.AbstractService#serviceRun()
+        */
+       @Override
+       protected void serviceRun() {
+               while (!shouldStop()) {
+                       Page nextPage = null;
+                       synchronized (syncObject) {
+                               while (!shouldStop() && pages.isEmpty()) {
+                                       try {
+                                               syncObject.wait();
+                                       } catch (InterruptedException ie1) {
+                                               /* ignore. */
+                                       }
+                               }
+                               if (!shouldStop()) {
+                                       nextPage = pages.remove(0);
+                               }
+                       }
+                       if (shouldStop()) {
+                               break;
+                       }
+                       URL nextURL = createURL(nextPage);
+                       if (nextURL == null) {
+                               logger.log(Level.INFO, "Skipping “" + nextPage + "”.");
+                               continue;
+                       }
+                       URLFetcher urlFetcher = new URLFetcher(this, nextURL);
+                       urlFetcherExecutor.execute(urlFetcher);
+               }
+       }
+
+       //
        // PRIVATE METHODS
        //
 
@@ -123,7 +171,7 @@ public class Core extends AbstractService {
         * @return The joined path
         */
        private String createPath(String[] pathComponents, int index) {
-               Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", pathComponents.length, index).check();
+               Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", index, pathComponents.length).check();
                StringBuilder path = new StringBuilder();
                for (int pathComponentIndex = index; pathComponentIndex < pathComponents.length; pathComponentIndex++) {
                        if (path.length() > 0) {
@@ -134,4 +182,21 @@ public class Core extends AbstractService {
                return path.toString();
        }
 
+       /**
+        * Creates a URL from the given page.
+        *
+        * @param page
+        *            The page to create a URL from
+        * @return The created URL, or <code>null</code> if the URL could not be
+        *         created
+        */
+       private URL createURL(Page page) {
+               try {
+                       return new URL("http://" + nodeHost + ":" + nodePort + "/");
+               } catch (MalformedURLException mue1) {
+                       /* nearly impossible. */
+               }
+               return null;
+       }
+
 }