Fanfiction step2 + SearchableTags
[fanfix.git] / src / be / nikiroo / fanfix / searchable / Fanfiction.java
1 package be.nikiroo.fanfix.searchable;
2
3 import java.io.IOException;
4 import java.net.URL;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.HashMap;
8 import java.util.List;
9 import java.util.Map;
10
11 import org.jsoup.nodes.Document;
12 import org.jsoup.nodes.Element;
13 import org.jsoup.select.Elements;
14
15 import be.nikiroo.fanfix.Instance;
16 import be.nikiroo.fanfix.bundles.StringId;
17 import be.nikiroo.fanfix.data.MetaData;
18 import be.nikiroo.fanfix.supported.SupportType;
19
20 /**
21 * A {@link BasicSearchable} for Fanfiction.NET.
22 *
23 * @author niki
24 */
25 class Fanfiction extends BasicSearchable {
26 static private String BASE_URL = "http://fanfiction.net/";
27
28 /**
29 * Create a new {@link Fanfiction}.
30 *
31 * @param type
32 * {@link SupportType#FANFICTION}
33 */
34 public Fanfiction(SupportType type) {
35 super(type);
36 }
37
38 @Override
39 public List<SearchableTag> getTags() throws IOException {
40 String storiesName = null;
41 String crossoversName = null;
42 Map<String, String> stories = new HashMap<String, String>();
43 Map<String, String> crossovers = new HashMap<String, String>();
44
45 Document mainPage = load(BASE_URL, true);
46 Element menu = mainPage.getElementsByClass("dropdown").first();
47 if (menu != null) {
48 Element ul = menu.getElementsByClass("dropdown-menu").first();
49 if (ul != null) {
50 Map<String, String> currentList = null;
51 for (Element li : ul.getElementsByTag("li")) {
52 if (li.hasClass("disabled")) {
53 if (storiesName == null) {
54 storiesName = li.text();
55 currentList = stories;
56 } else {
57 crossoversName = li.text();
58 currentList = crossovers;
59 }
60 } else if (currentList != null) {
61 Element a = li.getElementsByTag("a").first();
62 if (a != null) {
63 currentList.put(a.absUrl("href"), a.text());
64 }
65 }
66 }
67 }
68 }
69
70 List<SearchableTag> tags = new ArrayList<SearchableTag>();
71
72 if (storiesName != null) {
73 SearchableTag tag = new SearchableTag(null, storiesName, false);
74 for (String id : stories.keySet()) {
75 tag.add(new SearchableTag(id, stories.get(id), true, false));
76 }
77 tags.add(tag);
78 }
79
80 if (crossoversName != null) {
81 SearchableTag tag = new SearchableTag(null, crossoversName, false);
82 for (String id : crossovers.keySet()) {
83 tag.add(new SearchableTag(id, crossovers.get(id), false, false));
84 }
85 tags.add(tag);
86 }
87
88 return tags;
89 }
90
91 @Override
92 protected void fillTag(SearchableTag tag) throws IOException {
93 if (tag.getId() == null || tag.isComplete()) {
94 return;
95 }
96
97 boolean subtagIsLeaf = !tag.getId().contains("/crossovers/");
98
99 Document doc = load(tag.getId(), false);
100 Element list = doc.getElementById("list_output");
101 if (list != null) {
102 Element table = list.getElementsByTag("table").first();
103 if (table != null) {
104 for (Element div : table.getElementsByTag("div")) {
105 Element a = div.getElementsByTag("a").first();
106 Element span = div.getElementsByTag("span").first();
107
108 if (a != null) {
109 SearchableTag subtag = new SearchableTag(
110 a.absUrl("href"), a.text(), subtagIsLeaf);
111 tag.add(subtag);
112 if (span != null) {
113 String nr = span.text();
114 if (nr.startsWith("(")) {
115 nr = nr.substring(1);
116 }
117 if (nr.endsWith(")")) {
118 nr = nr.substring(0, nr.length() - 1);
119 }
120 nr = nr.trim();
121
122 long count = 0;
123 try {
124 if (nr.toLowerCase().endsWith("m")) {
125 count = Long.parseLong(nr.substring(0,
126 nr.length() - 1).trim());
127 count *= 1000000;
128 } else if (nr.toLowerCase().endsWith("k")) {
129 count = Long.parseLong(nr.substring(0,
130 nr.length() - 1).trim());
131 count *= 1000;
132 } else {
133 count = Long.parseLong(nr);
134 }
135 } catch (NumberFormatException pe) {
136 }
137
138 subtag.setCount(count);
139 }
140 }
141 }
142 }
143 }
144
145 tag.setComplete(true);
146 }
147
148 @Override
149 public List<MetaData> search(String search) throws IOException {
150 // TODO /search/?reader=1&type=story&keywords=blablablab
151 return null;
152 }
153
154 @Override
155 public List<MetaData> search(SearchableTag tag) throws IOException {
156 List<MetaData> metas = new ArrayList<MetaData>();
157
158 if (tag.getId() != null) {
159 Document doc = load(tag.getId(), false);
160
161 Element center = doc.getElementsByTag("center").first();
162 if (center != null) {
163 int pages = -1;
164 for (Element a : center.getElementsByTag("a")) {
165 if (a.absUrl("href").contains("&p=")) {
166 int thisLinkPages = -1;
167 try {
168 String[] tab = a.absUrl("href").split("=");
169 tab = tab[tab.length - 1].split("&");
170 thisLinkPages = Integer
171 .parseInt(tab[tab.length - 1]);
172 } catch (Exception e) {
173 }
174
175 pages = Math.max(pages, thisLinkPages);
176 }
177 }
178
179 tag.setPages(pages);
180 }
181
182 for (Element story : doc.getElementsByClass("z-list")) {
183 String title = "";
184 String url = "";
185 String coverUrl = "";
186
187 Element stitle = story.getElementsByClass("stitle").first();
188 if (stitle != null) {
189 title = stitle.text();
190 url = stitle.absUrl("href");
191 Element cover = stitle.getElementsByTag("img").first();
192 if (cover != null) {
193 // note: see data-original if needed?
194 coverUrl = cover.absUrl("src");
195 }
196 }
197
198 String author = "";
199
200 Elements as = story.getElementsByTag("a");
201 if (as.size() > 1) {
202 author = as.get(1).text();
203 }
204
205 String resume = "";
206 String tags = "";
207
208 Elements divs = story.getElementsByTag("div");
209 if (divs.size() > 1 && divs.get(1).childNodeSize() > 0) {
210 resume = divs.get(1).text();
211 if (divs.size() > 2) {
212 tags = divs.get(2).text();
213 resume = resume.substring(0,
214 resume.length() - tags.length()).trim();
215 }
216 }
217
218 MetaData meta = new MetaData();
219 meta.setAuthor(author);
220 // meta.setCover(cover); //TODO ?
221 meta.setImageDocument(false);
222 meta.setResume(getSupport().makeChapter(new URL(tag.getId()),
223 0, Instance.getTrans().getString(StringId.DESCRIPTION),
224 resume));
225 meta.setSource(getType().getSourceName());
226 // TODO: remove tags to interpret them instead (lang, words..)
227 meta.setTags(Arrays.asList(tags.split(" *- *")));
228 meta.setTitle(title);
229 meta.setUrl(url);
230
231 metas.add(meta);
232 }
233 }
234
235 return metas;
236 }
237
238 public static void main(String[] args) throws IOException {
239 Fanfiction f = new Fanfiction(SupportType.FANFICTION);
240
241 SearchableTag cartoons = f.getTags().get(0).getChildren().get(2);
242 f.fillTag(cartoons);
243 SearchableTag mlp = cartoons.getChildren().get(2);
244 System.out.println(mlp);
245
246 SearchableTag ccartoons = f.getTags().get(1).getChildren().get(0);
247 f.fillTag(ccartoons);
248 SearchableTag cmlp = ccartoons.getChildren().get(0);
249 System.out.println(cmlp);
250
251 f.fillTag(cmlp);
252 System.out.println(cmlp);
253
254 List<MetaData> metas = f.search(mlp);
255 System.out.println(mlp.getPages());
256 //System.out.println(metas);
257 }
258 }