import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
+import java.util.concurrent.Executor;
+import java.util.concurrent.Executors;
+import java.util.logging.Level;
+import java.util.logging.Logger;
import de.ina.util.service.AbstractService;
+import de.ina.util.thread.DumpingThreadFactory;
import de.ina.util.validation.Validation;
/**
*/
public class Core extends AbstractService {
+ /** The logger. */
+ private static final Logger logger = Logger.getLogger(Core.class.getName());
+
//
// PROPERTIES
//
// INTERNAL MEMBERS
//
+ /** Thread pool for the URL fetches. */
+ private Executor urlFetcherExecutor = Executors.newFixedThreadPool(1, new DumpingThreadFactory("URLFetcher-"));
+
/** The current list of URLs to crawl. */
- private final List<Page> pages = Collections.synchronizedList(new ArrayList<Page>());
+ private final List<Page> pages = new ArrayList<Page>();
//
// ACTIONS
}
//
+ // SERVICE METHODS
+ //
+
+ /**
+ * {@inheritdoc}
+ *
+ * @see de.ina.util.service.AbstractService#serviceRun()
+ */
+ @Override
+ protected void serviceRun() {
+ while (!shouldStop()) {
+ Page nextPage = null;
+ synchronized (syncObject) {
+ while (!shouldStop() && pages.isEmpty()) {
+ try {
+ syncObject.wait();
+ } catch (InterruptedException ie1) {
+ /* ignore. */
+ }
+ }
+ if (!shouldStop()) {
+ nextPage = pages.remove(0);
+ }
+ }
+ if (shouldStop()) {
+ break;
+ }
+ URL nextURL = createURL(nextPage);
+ if (nextURL == null) {
+ logger.log(Level.INFO, "Skipping “" + nextPage + "”.");
+ continue;
+ }
+ URLFetcher urlFetcher = new URLFetcher(this, nextURL);
+ urlFetcherExecutor.execute(urlFetcher);
+ }
+ }
+
+ //
// PRIVATE METHODS
//
* @return The joined path
*/
private String createPath(String[] pathComponents, int index) {
- Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", pathComponents.length, index).check();
+ Validation.begin().isNotNull("pathComponents", pathComponents).check().isLess("index", index, pathComponents.length).check();
StringBuilder path = new StringBuilder();
for (int pathComponentIndex = index; pathComponentIndex < pathComponents.length; pathComponentIndex++) {
if (path.length() > 0) {
return path.toString();
}
+ /**
+ * Creates a URL from the given page.
+ *
+ * @param page
+ * The page to create a URL from
+ * @return The created URL, or <code>null</code> if the URL could not be
+ * created
+ */
+ private URL createURL(Page page) {
+ try {
+ return new URL("http://" + nodeHost + ":" + nodePort + "/");
+ } catch (MalformedURLException mue1) {
+ /* nearly impossible. */
+ }
+ return null;
+ }
+
}