*
* @param url
* the URL to load
+ * @param stable
+ * TRUE for more stable resources, FALSE when they often change
+ *
* @return the document
*
* @throws IOException
* in case of I/O error
*/
- protected Document load(String url) throws IOException {
- return load(new URL(url));
+ protected Document load(String url, boolean stable) throws IOException {
+ return load(new URL(url), stable);
}
/**
*
* @param url
* the URL to load
+ * @param stable
+ * TRUE for more stable resources, FALSE when they often change
+ *
* @return the document
*
* @throws IOException
* in case of I/O error
*/
- protected Document load(URL url) throws IOException {
- return DataUtil.load(Instance.getCache().open(url, support, false),
+ protected Document load(URL url, boolean stable) throws IOException {
+ return DataUtil.load(Instance.getCache().open(url, support, stable),
"UTF-8", url.toString());
}
* @author niki
*/
class Fanfiction extends BasicSearchable {
+ static private String BASE_URL = "http://fanfiction.net/";
+
/**
* Create a new {@link Fanfiction}.
*
Map<String, String> stories = new HashMap<String, String>();
Map<String, String> crossovers = new HashMap<String, String>();
- Document mainPage = load("http://fanfiction.net/");
+ Document mainPage = load(BASE_URL, true);
Element menu = mainPage.getElementsByClass("dropdown").first();
if (menu != null) {
Element ul = menu.getElementsByClass("dropdown-menu").first();
List<SearchableTag> tags = new ArrayList<SearchableTag>();
if (storiesName != null) {
- SearchableTag tag = new SearchableTag(null, storiesName, true);
+ SearchableTag tag = new SearchableTag(null, storiesName, false);
for (String id : stories.keySet()) {
- tag.add(new SearchableTag(id, stories.get(id), false));
+ tag.add(new SearchableTag(id, stories.get(id), true, false));
}
tags.add(tag);
}
if (crossoversName != null) {
- SearchableTag tag = new SearchableTag(null, crossoversName, true);
+ SearchableTag tag = new SearchableTag(null, crossoversName, false);
for (String id : crossovers.keySet()) {
- tag.add(new SearchableTag(id, crossovers.get(id), false));
+ tag.add(new SearchableTag(id, crossovers.get(id), false, false));
}
tags.add(tag);
}
return;
}
- boolean subtagIsComplete = !tag.getId().contains("/crossovers/");
+ boolean subtagIsLeaf = !tag.getId().contains("/crossovers/");
- Document doc = load(tag.getId());
+ Document doc = load(tag.getId(), false);
Element list = doc.getElementById("list_output");
if (list != null) {
Element table = list.getElementsByTag("table").first();
if (a != null) {
SearchableTag subtag = new SearchableTag(
- a.absUrl("href"), a.text(), subtagIsComplete);
+ a.absUrl("href"), a.text(), subtagIsLeaf);
tag.add(subtag);
if (span != null) {
String nr = span.text();
@Override
public List<MetaData> search(String search) throws IOException {
- // TODO Auto-generated method stub
+ // TODO /search/?reader=1&type=story&keywords=blablablab
return null;
}
List<MetaData> metas = new ArrayList<MetaData>();
if (tag.getId() != null) {
- Document doc = load(tag.getId());
+ Document doc = load(tag.getId(), false);
+
+ Element center = doc.getElementsByTag("center").first();
+ if (center != null) {
+ int pages = -1;
+ for (Element a : center.getElementsByTag("a")) {
+ if (a.absUrl("href").contains("&p=")) {
+ int thisLinkPages = -1;
+ try {
+ String[] tab = a.absUrl("href").split("=");
+ tab = tab[tab.length - 1].split("&");
+ thisLinkPages = Integer
+ .parseInt(tab[tab.length - 1]);
+ } catch (Exception e) {
+ }
+
+ pages = Math.max(pages, thisLinkPages);
+ }
+ }
+
+ tag.setPages(pages);
+ }
+
for (Element story : doc.getElementsByClass("z-list")) {
String title = "";
String url = "";
0, Instance.getTrans().getString(StringId.DESCRIPTION),
resume));
meta.setSource(getType().getSourceName());
+ // TODO: remove tags to interpret them instead (lang, words..)
meta.setTags(Arrays.asList(tags.split(" *- *")));
meta.setTitle(title);
meta.setUrl(url);
System.out.println(cmlp);
List<MetaData> metas = f.search(mlp);
- System.out.println(metas);
+ System.out.println(mlp.getPages());
+ //System.out.println(metas);
}
}
private long count;
private List<SearchableTag> children;
+ /**
+ * The number of stories result pages this tag can get.
+ * <p>
+ * We keep more information than what the getter/setter returns/accepts.
+ * <ul>
+ * <li>-2: this tag does not support stories results (not a leaf tag)</li>
+ * <li>-1: the number is not yet known, but will be known after a
+ * {@link BasicSearchable#fillTag(SearchableTag)} operation</li>
+ * <li>X: the number of pages</li>
+ * </ul>
+ */
+ private int pages;
+
/**
* Create a new {@link SearchableTag}.
+ * <p>
+ * Note that tags are complete by default.
*
* @param id
* the ID (usually a way to find the linked stories later on)
* @param name
* the tag name, which can be displayed to the user
+ * @param leaf
+ * the tag is a leaf tag, that is, it will not return subtags
+ * with {@link BasicSearchable#fillTag(SearchableTag)} but will
+ * return stories with
+ * {@link BasicSearchable#search(SearchableTag)}
+ */
+ public SearchableTag(String id, String name, boolean leaf) {
+ this(id, name, leaf, true);
+ }
+
+ /**
+ * Create a new {@link SearchableTag}.
+ *
+ * @param id
+ * the ID (usually a way to find the linked stories later on)
+ * @param name
+ * the tag name, which can be displayed to the user
+ * @param leaf
+ * the tag is a leaf tag, that is, it will not return subtags
+ * with {@link BasicSearchable#fillTag(SearchableTag)} but will
+ * return stories with
+ * {@link BasicSearchable#search(SearchableTag)}
* @param complete
- * TRUE for a {@link SearchableTag} that cannot be "filled" by
- * the {@link BasicSearchable} in order to get (more?) subtag
- * children
+ * the tag {@link SearchableTag#isComplete()} or not
*/
- public SearchableTag(String id, String name, boolean complete) {
+ public SearchableTag(String id, String name, boolean leaf, boolean complete) {
this.id = id;
this.name = name;
this.complete = complete;
+ setLeaf(leaf);
+
children = new ArrayList<SearchableTag>();
}
+ /**
+ * The ID (usually a way to find the linked stories later on).
+ *
+ * @return the ID
+ */
public String getId() {
return id;
}
+ /**
+ * The tag name, which can be displayed to the user.
+ *
+ * @return then name
+ */
public String getName() {
return name;
}
/**
- * This tag can still be completed via a "fill" tag operation from a
+ * Non-complete, non-leaf tags can still be completed via a
+ * {@link BasicSearchable#fillTag(SearchableTag)} operation from a
* {@link BasicSearchable}, in order to gain (more?) subtag children.
+ * <p>
+ * This method does not make sense for leaf tags.
*
- * @return TRUE if it can
+ * @return TRUE if it is complete
*/
public boolean isComplete() {
return complete;
}
/**
- * This tag can still be completed via a "fill" tag operation from a
+ * Non-complete, non-leaf tags can still be completed via a
+ * {@link BasicSearchable#fillTag(SearchableTag)} operation from a
* {@link BasicSearchable}, in order to gain (more?) subtag children.
+ * <p>
+ * This method does not make sense for leaf tags.
*
* @param complete
- * TRUE if it can
+ * TRUE if it is complete
*/
public void setComplete(boolean complete) {
this.complete = complete;
this.count = count;
}
+ /**
+ * The number of stories result pages this tag contains, only make sense if
+ * {@link SearchableTag#isLeaf()} returns TRUE.
+ * <p>
+ * Will return -1 if the number is not yet known.
+ *
+ * @return the number of pages, or -1
+ */
+ public int getPages() {
+ return Math.max(-1, pages);
+ }
+
+ /**
+ * The number of stories result pages this tag contains, only make sense if
+ * {@link SearchableTag#isLeaf()} returns TRUE.
+ *
+ * @param pages
+ * the (positive or 0) number of pages
+ */
+ public void setPages(int pages) {
+ this.pages = Math.max(-1, pages);
+ }
+
+ /**
+ * This tag is a leaf tag, that is, it will not return other subtags with
+ * {@link BasicSearchable#fillTag(SearchableTag)} but will return stories
+ * with {@link BasicSearchable#search(SearchableTag)}.
+ *
+ * @return TRUE if it is
+ */
+ public boolean isLeaf() {
+ return pages > -2;
+ }
+
+ /**
+ * This tag is a leaf tag, that is, it will not return other subtags with
+ * {@link BasicSearchable#fillTag(SearchableTag)} but will return stories
+ * with {@link BasicSearchable#search(SearchableTag)}.
+ * <p>
+ * Will reset the number of pages to -1.
+ *
+ * @param leaf
+ * TRUE if it is
+ */
+ public void setLeaf(boolean leaf) {
+ pages = leaf ? -1 : -2;
+ }
+
/**
* The subtag children of this {@link SearchableTag}.
* <p>