9e3f1a55a1d0d1143b792474429eef63013b7a3e
[arachne.git] / src / net / pterodactylus / arachne / core / Core.java
1 /*
2  * © 2009 David ‘Bombe’ Roden
3  */
4 package net.pterodactylus.arachne.core;
5
6 import java.net.MalformedURLException;
7 import java.net.URL;
8 import java.security.NoSuchAlgorithmException;
9 import java.util.ArrayList;
10 import java.util.List;
11 import java.util.concurrent.Executor;
12 import java.util.concurrent.Executors;
13 import java.util.logging.Level;
14 import java.util.logging.Logger;
15
16 import de.ina.util.service.AbstractService;
17 import de.ina.util.thread.DumpingThreadFactory;
18 import de.ina.util.validation.Validation;
19
20 /**
21  * Arachne’s core.
22  *
23  * @author David ‘Bombe’ Roden <bombe@pterodactylus.net>
24  */
25 public class Core extends AbstractService {
26
27         /** The logger. */
28         private static final Logger logger = Logger.getLogger(Core.class.getName());
29
30         //
31         // PROPERTIES
32         //
33
34         /** The host of the freenet node. */
35         private String nodeHost = "localhost";
36
37         /** The port of the freenet node. */
38         private int nodePort = 8888;
39
40         //
41         // INTERNAL MEMBERS
42         //
43
44         /** Thread pool for the URL fetches. */
45         private Executor urlFetcherExecutor = Executors.newFixedThreadPool(1, new DumpingThreadFactory("URLFetcher-"));
46
47         /** The current list of URLs to crawl. */
48         private final List<Page> pages = new ArrayList<Page>();
49
50         //
51         // ACCESSORS
52         //
53
54         /**
55          * Sets the host name of the node.
56          *
57          * @param nodeHost
58          *            The node’s host name
59          */
60         public void setNodeHost(String nodeHost) {
61                 this.nodeHost = nodeHost;
62         }
63
64         //
65         // ACTIONS
66         //
67
68         /**
69          * Adds the given URL to the list of pages to crawl.
70          *
71          * @param url
72          *            The URL to add
73          */
74         public void addPage(URL url) {
75                 Validation.begin().isNotNull("url", url).check().isEqual("url.getHost()", url.getHost(), (Object) nodeHost).isEqual("url.getPort()", url.getPort(), nodePort).check();
76         }
77
78         /**
79          * Adds the given URL to the list of pages to crawl.
80          *
81          * @param url
82          *            The URL of the page to crawl
83          * @throws MalformedURLException
84          *             if the URL is not a valid URL
85          */
86         public void addPage(String url) throws MalformedURLException {
87                 Validation.begin().isNotNull("url", (Object) url).check();
88                 addPage(new URL(url));
89         }
90
91         /**
92          * Adds the given page to the list of pages to crawl.
93          *
94          * @param page
95          *            The page to add
96          */
97         public void addPage(Page page) {
98                 Validation.begin().isNotNull("page", page).check();
99                 pages.add(page);
100                 notifySyncObject();
101         }
102
103         //
104         // SERVICE METHODS
105         //
106
107         /**
108          * {@inheritdoc}
109          *
110          * @see de.ina.util.service.AbstractService#serviceRun()
111          */
112         @Override
113         @SuppressWarnings("null")
114         protected void serviceRun() {
115                 while (!shouldStop()) {
116                         Page nextPage = null;
117                         synchronized (syncObject) {
118                                 while (!shouldStop() && pages.isEmpty()) {
119                                         try {
120                                                 syncObject.wait();
121                                         } catch (InterruptedException ie1) {
122                                                 /* ignore. */
123                                         }
124                                 }
125                                 if (!shouldStop()) {
126                                         nextPage = pages.remove(0);
127                                 }
128                         }
129                         if (shouldStop()) {
130                                 break;
131                         }
132                         URL nextURL = nextPage.toURL(nodeHost, nodePort);
133                         if (nextURL == null) {
134                                 logger.log(Level.INFO, "Skipping “" + nextPage + "”.");
135                                 continue;
136                         }
137                         URLFetcher urlFetcher;
138                         try {
139                                 logger.log(Level.INFO, "Fetching “" + nextURL + "”...");
140                                 urlFetcher = new URLFetcher(nextURL);
141                                 urlFetcherExecutor.execute(urlFetcher);
142                         } catch (NoSuchAlgorithmException nsae1) {
143                                 logger.log(Level.SEVERE, "Could not get “SHA-256” message digest!", nsae1);
144                         }
145                 }
146         }
147
148 }