supported/MangaFox.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.MalformedURLException;
   6 import java.net.URL;
   7 import java.util.AbstractMap;
   8 import java.util.ArrayList;
   9 import java.util.Collections;
  10 import java.util.List;
  11 import java.util.Map.Entry;
  12 import java.util.SortedMap;
  13 import java.util.TreeMap;
  14
  15 import org.jsoup.helper.DataUtil;
  16 import org.jsoup.nodes.Element;
  17 import org.jsoup.select.Elements;
  18
  19 import be.nikiroo.fanfix.Instance;
  20 import be.nikiroo.fanfix.data.MetaData;
  21 import be.nikiroo.utils.Image;
  22 import be.nikiroo.utils.Progress;
  23 import be.nikiroo.utils.StringUtils;
  24
  25 class MangaFox extends BasicSupport {
  26         @Override
  27         protected boolean isHtml() {
  28                 return true;
  29         }
  30
  31         @Override
  32         public String getSourceName() {
  33                 return "MangaFox.me";
  34         }
  35
  36         @Override
  37         protected MetaData getMeta() throws IOException {
  38                 MetaData meta = new MetaData();
  39                 Element doc = getSourceNode();
  40
  41                 Element title = doc.getElementById("title");
  42                 Elements table = null;
  43                 if (title != null) {
  44                         table = title.getElementsByTag("table");
  45                 }
  46                 if (table != null) {
  47                         // Rows: header, data
  48                         Elements rows = table.first().getElementsByTag("tr");
  49                         if (rows.size() > 1) {
  50                                 table = rows.get(1).getElementsByTag("td");
  51                                 // Columns: Realeased, Authors, Artists, Genres
  52                                 if (table.size() < 4) {
  53                                         table = null;
  54                                 }
  55                         }
  56                 }
  57
  58                 meta.setTitle(getTitle());
  59                 if (table != null) {
  60                         meta.setAuthor(getAuthors(table.get(1).text() + ","
  61                                         + table.get(2).text()));
  62
  63                         meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
  64                         meta.setTags(explode(table.get(3).text()));
  65                 }
  66                 meta.setSource(getSourceName());
  67                 meta.setUrl(getSource().toString());
  68                 meta.setPublisher(getSourceName());
  69                 meta.setUuid(getSource().toString());
  70                 meta.setLuid("");
  71                 meta.setLang("en");
  72                 meta.setSubject("manga");
  73                 meta.setType(getType().toString());
  74                 meta.setImageDocument(true);
  75                 meta.setCover(getCover());
  76
  77                 return meta;
  78         }
  79
  80         private String getTitle() {
  81                 Element doc = getSourceNode();
  82
  83                 Element title = doc.getElementById("title");
  84                 Element h1 = title.getElementsByTag("h1").first();
  85                 if (h1 != null) {
  86                         return StringUtils.unhtml(h1.text()).trim();
  87                 }
  88
  89                 return null;
  90         }
  91
  92         private String getAuthors(String authorList) {
  93                 String author = "";
  94                 for (String auth : explode(authorList)) {
  95                         if (!author.isEmpty()) {
  96                                 author = author + ", ";
  97                         }
  98                         author += auth;
  99                 }
 100
 101                 return author;
 102         }
 103
 104         @Override
 105         protected String getDesc() {
 106                 Element doc = getSourceNode();
 107                 Element title = doc.getElementsByClass("summary").first();
 108                 if (title != null) {
 109                         StringUtils.unhtml(title.text()).trim();
 110                 }
 111
 112                 return null;
 113         }
 114
 115         private Image getCover() {
 116                 Element doc = getSourceNode();
 117                 Element cover = doc.getElementsByClass("cover").first();
 118                 if (cover != null) {
 119                         cover = cover.getElementsByTag("img").first();
 120                 }
 121
 122                 if (cover != null) {
 123                         String coverUrl = cover.absUrl("src");
 124
 125                         InputStream coverIn;
 126                         try {
 127                                 coverIn = openEx(coverUrl);
 128                                 try {
 129                                         return new Image(coverIn);
 130                                 } finally {
 131                                         coverIn.close();
 132                                 }
 133                         } catch (IOException e) {
 134                                 Instance.getTraceHandler().error(e);
 135                         }
 136                 }
 137
 138                 return null;
 139         }
 140
 141         @Override
 142         protected List<Entry<String, URL>> getChapters(Progress pg) {
 143                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 144
 145                 String prefix = null; // each chapter starts with this prefix, then a
 146                                                                 // chapter number (including "x.5"), then name
 147
 148                 Element doc = getSourceNode();
 149                 for (Element li : doc.getElementsByTag("li")) {
 150                         Element el = li.getElementsByTag("h4").first();
 151                         if (el == null) {
 152                                 el = li.getElementsByTag("h3").first();
 153                         }
 154                         if (el != null) {
 155                                 Element a = el.getElementsByTag("a").first();
 156                                 if (a != null) {
 157                                         String title = StringUtils.unhtml(el.text()).trim();
 158                                         try {
 159                                                 String url = a.absUrl("href");
 160                                                 if (url.endsWith("1.html")) {
 161                                                         url = url.substring(0,
 162                                                                         url.length() - "1.html".length());
 163                                                 }
 164                                                 if (!url.endsWith("/")) {
 165                                                         url += "/";
 166                                                 }
 167
 168                                                 if (prefix == null || !prefix.isEmpty()) {
 169                                                         StringBuilder possiblePrefix = new StringBuilder(
 170                                                                         StringUtils.unhtml(a.text()).trim());
 171                                                         while (possiblePrefix.length() > 0) {
 172                                                                 char car = possiblePrefix.charAt(possiblePrefix
 173                                                                                 .length() - 1);
 174                                                                 boolean punctuation = (car == '.' || car == ' ');
 175                                                                 boolean digit = (car >= '0' && car <= '9');
 176                                                                 if (!punctuation && !digit) {
 177                                                                         break;
 178                                                                 }
 179
 180                                                                 possiblePrefix.setLength(possiblePrefix
 181                                                                                 .length() - 1);
 182                                                         }
 183
 184                                                         if (prefix == null) {
 185                                                                 prefix = possiblePrefix.toString();
 186                                                         }
 187
 188                                                         if (!prefix.equalsIgnoreCase(possiblePrefix
 189                                                                         .toString())) {
 190                                                                 prefix = ""; // prefix not ok
 191                                                         }
 192                                                 }
 193
 194                                                 urls.add(new AbstractMap.SimpleEntry<String, URL>(
 195                                                                 title, new URL(url)));
 196                                         } catch (Exception e) {
 197                                                 Instance.getTraceHandler().error(e);
 198                                         }
 199                                 }
 200                         }
 201                 }
 202
 203                 if (prefix != null && !prefix.isEmpty()) {
 204                         try {
 205                                 // We found a prefix, so everything should be sortable
 206                                 SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
 207                                 for (Entry<String, URL> entry : urls) {
 208                                         String num = entry.getKey().substring(prefix.length() + 1)
 209                                                         .trim();
 210                                         String name = "";
 211                                         int pos = num.indexOf(' ');
 212                                         if (pos >= 0) {
 213                                                 name = num.substring(pos).trim();
 214                                                 num = num.substring(0, pos).trim();
 215                                         }
 216
 217                                         if (!name.isEmpty()) {
 218                                                 name = "Tome " + num + ": " + name;
 219                                         } else {
 220                                                 name = "Tome " + num;
 221                                         }
 222
 223                                         double key = Double.parseDouble(num);
 224
 225                                         map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
 226                                                         entry.getValue()));
 227                                 }
 228                                 urls = new ArrayList<Entry<String, URL>>(map.values());
 229                         } catch (NumberFormatException e) {
 230                                 Instance.getTraceHandler()
 231                                                 .error(new IOException(
 232                                                                 "Cannot find a tome number, revert to default sorting",
 233                                                                 e));
 234                                 // by default, the chapters are in reversed order
 235                                 Collections.reverse(urls);
 236                         }
 237                 } else {
 238                         // by default, the chapters are in reversed order
 239                         Collections.reverse(urls);
 240                 }
 241
 242                 return urls;
 243         }
 244
 245         @Override
 246         protected String getChapterContent(URL chapUrl, int number, Progress pg)
 247                         throws IOException {
 248                 if (pg == null) {
 249                         pg = new Progress();
 250                 }
 251
 252                 StringBuilder builder = new StringBuilder();
 253
 254                 String url = chapUrl.toString();
 255                 InputStream imageIn = null;
 256                 Element imageDoc = null;
 257
 258                 // 1. find out how many images there are
 259                 int size;
 260                 try {
 261                         // note: when used, the base URL can be an ad-page
 262                         imageIn = openEx(url + "1.html");
 263                         imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
 264                 } finally {
 265                         imageIn.close();
 266                 }
 267                 Element select = imageDoc.getElementsByClass("m").first();
 268                 Elements options = select.getElementsByTag("option");
 269                 size = options.size() - 1; // last is "Comments"
 270
 271                 pg.setMinMax(0, size);
 272
 273                 // 2. list them
 274                 for (int i = 1; i <= size; i++) {
 275                         if (i > 1) { // because fist one was opened for size
 276                                 try {
 277                                         imageIn = openEx(url + i + ".html");
 278                                         imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
 279                                                         + ".html");
 280                                 } finally {
 281                                         imageIn.close();
 282                                 }
 283                         }
 284
 285                         String linkImage = imageDoc.getElementById("image").absUrl("src");
 286                         if (linkImage != null) {
 287                                 builder.append("[");
 288                                 // to help with the retry and the originalUrl, part 1
 289                                 builder.append(withoutQuery(linkImage));
 290                                 builder.append("]<br/>");
 291                         }
 292
 293                         // to help with the retry and the originalUrl, part 2
 294                         refresh(linkImage);
 295                 }
 296
 297                 return builder.toString();
 298         }
 299
 300         /**
 301          * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
 302          *
 303          * @param url
 304          *            the URL to refresh
 305          *
 306          * @return TRUE if it was refreshed
 307          */
 308         private boolean refresh(String url) {
 309                 try {
 310                         openEx(url).close();
 311                         return true;
 312                 } catch (Exception e) {
 313                         return false;
 314                 }
 315         }
 316
 317         /**
 318          * Open the URL through the cache, but: retry a second time after 100ms if
 319          * it fails, remove the query part of the {@link URL} before saving it to
 320          * the cache (so it can be recalled later).
 321          *
 322          * @param url
 323          *            the {@link URL}
 324          *
 325          * @return the resource
 326          *
 327          * @throws IOException
 328          *             in case of I/O error
 329          */
 330         private InputStream openEx(String url) throws IOException {
 331                 try {
 332                         return Instance.getCache().open(new URL(url), this, true,
 333                                         withoutQuery(url));
 334                 } catch (Exception e) {
 335                         // second chance
 336                         try {
 337                                 Thread.sleep(100);
 338                         } catch (InterruptedException ee) {
 339                         }
 340
 341                         return Instance.getCache().open(new URL(url), this, true,
 342                                         withoutQuery(url));
 343                 }
 344         }
 345
 346         /**
 347          * Return the same input {@link URL} but without the query part.
 348          *
 349          * @param url
 350          *            the inpiut {@link URL} as a {@link String}
 351          *
 352          * @return the input {@link URL} without query
 353          */
 354         private URL withoutQuery(String url) {
 355                 URL o = null;
 356                 try {
 357                         // Remove the query from o (originalUrl), so it can be cached
 358                         // correctly
 359                         o = new URL(url);
 360                         o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());
 361
 362                         return o;
 363                 } catch (MalformedURLException e) {
 364                         return null;
 365                 }
 366         }
 367
 368         /**
 369          * Explode an HTML comma-separated list of values into a non-duplicate text
 370          * {@link List} .
 371          *
 372          * @param values
 373          *            the comma-separated values in HTML format
 374          *
 375          * @return the full list with no duplicate in text format
 376          */
 377         private List<String> explode(String values) {
 378                 List<String> list = new ArrayList<String>();
 379                 if (values != null && !values.isEmpty()) {
 380                         for (String auth : values.split(",")) {
 381                                 String a = StringUtils.unhtml(auth).trim();
 382                                 if (!a.isEmpty() && !list.contains(a.trim())) {
 383                                         list.add(a);
 384                                 }
 385                         }
 386                 }
 387
 388                 return list;
 389         }
 390
 391         @Override
 392         protected boolean supports(URL url) {
 393                 return "mangafox.me".equals(url.getHost())
 394                                 || "www.mangafox.me".equals(url.getHost())
 395                                 || "fanfox.net".equals(url.getHost())
 396                                 || "www.fanfox.net".equals(url.getHost());
 397         }
 398 }