be/nikiroo/fanfix/supported/MangaFox.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.MalformedURLException;
   6 import java.net.URL;
   7 import java.util.AbstractMap;
   8 import java.util.ArrayList;
   9 import java.util.Collections;
  10 import java.util.List;
  11 import java.util.Map.Entry;
  12 import java.util.SortedMap;
  13 import java.util.TreeMap;
  14
  15 import org.jsoup.helper.DataUtil;
  16 import org.jsoup.nodes.Element;
  17 import org.jsoup.select.Elements;
  18
  19 import be.nikiroo.fanfix.Instance;
  20 import be.nikiroo.fanfix.data.MetaData;
  21 import be.nikiroo.utils.Image;
  22 import be.nikiroo.utils.Progress;
  23 import be.nikiroo.utils.StringUtils;
  24
  25 class MangaFox extends BasicSupport {
  26         @Override
  27         protected boolean isHtml() {
  28                 return true;
  29         }
  30
  31         @Override
  32         protected MetaData getMeta() throws IOException {
  33                 MetaData meta = new MetaData();
  34                 Element doc = getSourceNode();
  35
  36                 Element title = doc.getElementById("title");
  37                 Elements table = null;
  38                 if (title != null) {
  39                         table = title.getElementsByTag("table");
  40                 }
  41                 if (table != null) {
  42                         // Rows: header, data
  43                         Elements rows = table.first().getElementsByTag("tr");
  44                         if (rows.size() > 1) {
  45                                 table = rows.get(1).getElementsByTag("td");
  46                                 // Columns: Realeased, Authors, Artists, Genres
  47                                 if (table.size() < 4) {
  48                                         table = null;
  49                                 }
  50                         }
  51                 }
  52
  53                 meta.setTitle(getTitle());
  54                 if (table != null) {
  55                         meta.setAuthor(getAuthors(table.get(1).text() + ","
  56                                         + table.get(2).text()));
  57
  58                         meta.setDate(StringUtils.unhtml(table.get(0).text()).trim());
  59                         meta.setTags(explode(table.get(3).text()));
  60                 }
  61                 meta.setSource(getType().getSourceName());
  62                 meta.setUrl(getSource().toString());
  63                 meta.setPublisher(getType().getSourceName());
  64                 meta.setUuid(getSource().toString());
  65                 meta.setLuid("");
  66                 meta.setLang("en");
  67                 meta.setSubject("manga");
  68                 meta.setType(getType().toString());
  69                 meta.setImageDocument(true);
  70                 meta.setCover(getCover());
  71
  72                 return meta;
  73         }
  74
  75         private String getTitle() {
  76                 Element doc = getSourceNode();
  77
  78                 Element title = doc.getElementById("title");
  79                 Element h1 = title.getElementsByTag("h1").first();
  80                 if (h1 != null) {
  81                         return StringUtils.unhtml(h1.text()).trim();
  82                 }
  83
  84                 return null;
  85         }
  86
  87         private String getAuthors(String authorList) {
  88                 String author = "";
  89                 for (String auth : explode(authorList)) {
  90                         if (!author.isEmpty()) {
  91                                 author = author + ", ";
  92                         }
  93                         author += auth;
  94                 }
  95
  96                 return author;
  97         }
  98
  99         @Override
 100         protected String getDesc() {
 101                 Element doc = getSourceNode();
 102                 Element title = doc.getElementsByClass("summary").first();
 103                 if (title != null) {
 104                         return StringUtils.unhtml(title.text()).trim();
 105                 }
 106
 107                 return null;
 108         }
 109
 110         private Image getCover() {
 111                 Element doc = getSourceNode();
 112                 Element cover = doc.getElementsByClass("cover").first();
 113                 if (cover != null) {
 114                         cover = cover.getElementsByTag("img").first();
 115                 }
 116
 117                 if (cover != null) {
 118                         String coverUrl = cover.absUrl("src");
 119
 120                         InputStream coverIn;
 121                         try {
 122                                 coverIn = openEx(coverUrl);
 123                                 try {
 124                                         return new Image(coverIn);
 125                                 } finally {
 126                                         coverIn.close();
 127                                 }
 128                         } catch (IOException e) {
 129                                 Instance.getTraceHandler().error(e);
 130                         }
 131                 }
 132
 133                 return null;
 134         }
 135
 136         @Override
 137         protected List<Entry<String, URL>> getChapters(Progress pg) {
 138                 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
 139
 140                 String prefix = null; // each chapter starts with this prefix, then a
 141                                                                 // chapter number (including "x.5"), then name
 142
 143                 Element doc = getSourceNode();
 144                 for (Element li : doc.getElementsByTag("li")) {
 145                         Element el = li.getElementsByTag("h4").first();
 146                         if (el == null) {
 147                                 el = li.getElementsByTag("h3").first();
 148                         }
 149                         if (el != null) {
 150                                 Element a = el.getElementsByTag("a").first();
 151                                 if (a != null) {
 152                                         String title = StringUtils.unhtml(el.text()).trim();
 153                                         try {
 154                                                 String url = a.absUrl("href");
 155                                                 if (url.endsWith("1.html")) {
 156                                                         url = url.substring(0,
 157                                                                         url.length() - "1.html".length());
 158                                                 }
 159                                                 if (!url.endsWith("/")) {
 160                                                         url += "/";
 161                                                 }
 162
 163                                                 if (prefix == null || !prefix.isEmpty()) {
 164                                                         StringBuilder possiblePrefix = new StringBuilder(
 165                                                                         StringUtils.unhtml(a.text()).trim());
 166                                                         while (possiblePrefix.length() > 0) {
 167                                                                 char car = possiblePrefix.charAt(possiblePrefix
 168                                                                                 .length() - 1);
 169                                                                 boolean punctuation = (car == '.' || car == ' ');
 170                                                                 boolean digit = (car >= '0' && car <= '9');
 171                                                                 if (!punctuation && !digit) {
 172                                                                         break;
 173                                                                 }
 174
 175                                                                 possiblePrefix.setLength(possiblePrefix
 176                                                                                 .length() - 1);
 177                                                         }
 178
 179                                                         if (prefix == null) {
 180                                                                 prefix = possiblePrefix.toString();
 181                                                         }
 182
 183                                                         if (!prefix.equalsIgnoreCase(possiblePrefix
 184                                                                         .toString())) {
 185                                                                 prefix = ""; // prefix not ok
 186                                                         }
 187                                                 }
 188
 189                                                 urls.add(new AbstractMap.SimpleEntry<String, URL>(
 190                                                                 title, new URL(url)));
 191                                         } catch (Exception e) {
 192                                                 Instance.getTraceHandler().error(e);
 193                                         }
 194                                 }
 195                         }
 196                 }
 197
 198                 if (prefix != null && !prefix.isEmpty()) {
 199                         try {
 200                                 // We found a prefix, so everything should be sortable
 201                                 SortedMap<Double, Entry<String, URL>> map = new TreeMap<Double, Entry<String, URL>>();
 202                                 for (Entry<String, URL> entry : urls) {
 203                                         String num = entry.getKey().substring(prefix.length() + 1)
 204                                                         .trim();
 205                                         String name = "";
 206                                         int pos = num.indexOf(' ');
 207                                         if (pos >= 0) {
 208                                                 name = num.substring(pos).trim();
 209                                                 num = num.substring(0, pos).trim();
 210                                         }
 211
 212                                         if (!name.isEmpty()) {
 213                                                 name = "Tome " + num + ": " + name;
 214                                         } else {
 215                                                 name = "Tome " + num;
 216                                         }
 217
 218                                         double key = Double.parseDouble(num);
 219
 220                                         map.put(key, new AbstractMap.SimpleEntry<String, URL>(name,
 221                                                         entry.getValue()));
 222                                 }
 223                                 urls = new ArrayList<Entry<String, URL>>(map.values());
 224                         } catch (NumberFormatException e) {
 225                                 Instance.getTraceHandler()
 226                                                 .error(new IOException(
 227                                                                 "Cannot find a tome number, revert to default sorting",
 228                                                                 e));
 229                                 // by default, the chapters are in reversed order
 230                                 Collections.reverse(urls);
 231                         }
 232                 } else {
 233                         // by default, the chapters are in reversed order
 234                         Collections.reverse(urls);
 235                 }
 236
 237                 return urls;
 238         }
 239
 240         @Override
 241         protected String getChapterContent(URL chapUrl, int number, Progress pg)
 242                         throws IOException {
 243                 if (pg == null) {
 244                         pg = new Progress();
 245                 }
 246
 247                 StringBuilder builder = new StringBuilder();
 248
 249                 String url = chapUrl.toString();
 250                 InputStream imageIn = null;
 251                 Element imageDoc = null;
 252
 253                 // 1. find out how many images there are
 254                 int size;
 255                 try {
 256                         // note: when used, the base URL can be an ad-page
 257                         imageIn = openEx(url + "1.html");
 258                         imageDoc = DataUtil.load(imageIn, "UTF-8", url + "1.html");
 259                 } catch (IOException e) {
 260                         Instance.getTraceHandler().error(
 261                                         new IOException("Cannot get image " + 1 + " of manga", e));
 262                 } finally {
 263                         if (imageIn != null) {
 264                                 imageIn.close();
 265                         }
 266                 }
 267                 Element select = imageDoc.getElementsByClass("m").first();
 268                 Elements options = select.getElementsByTag("option");
 269                 size = options.size() - 1; // last is "Comments"
 270
 271                 pg.setMinMax(0, size);
 272
 273                 // 2. list them
 274                 for (int i = 1; i <= size; i++) {
 275                         if (i > 1) { // because first one was opened for size
 276                                 try {
 277                                         imageIn = openEx(url + i + ".html");
 278                                         imageDoc = DataUtil.load(imageIn, "UTF-8", url + i
 279                                                         + ".html");
 280
 281                                         String linkImage = imageDoc.getElementById("image").absUrl(
 282                                                         "src");
 283                                         if (linkImage != null) {
 284                                                 builder.append("[");
 285                                                 // to help with the retry and the originalUrl, part 1
 286                                                 builder.append(withoutQuery(linkImage));
 287                                                 builder.append("]<br/>");
 288                                         }
 289
 290                                         // to help with the retry and the originalUrl, part 2
 291                                         refresh(linkImage);
 292                                 } catch (IOException e) {
 293                                         Instance.getTraceHandler().error(
 294                                                         new IOException("Cannot get image " + i
 295                                                                         + " of manga", e));
 296                                 } finally {
 297                                         if (imageIn != null) {
 298                                                 imageIn.close();
 299                                         }
 300                                 }
 301                         }
 302                 }
 303
 304                 return builder.toString();
 305         }
 306
 307         /**
 308          * Refresh the {@link URL} by calling {@link MangaFox#openEx(String)}.
 309          *
 310          * @param url
 311          *            the URL to refresh
 312          *
 313          * @return TRUE if it was refreshed
 314          */
 315         private boolean refresh(String url) {
 316                 try {
 317                         openEx(url).close();
 318                         return true;
 319                 } catch (Exception e) {
 320                         return false;
 321                 }
 322         }
 323
 324         /**
 325          * Open the URL through the cache, but: retry a second time after 100ms if
 326          * it fails, remove the query part of the {@link URL} before saving it to
 327          * the cache (so it can be recalled later).
 328          *
 329          * @param url
 330          *            the {@link URL}
 331          *
 332          * @return the resource
 333          *
 334          * @throws IOException
 335          *             in case of I/O error
 336          */
 337         private InputStream openEx(String url) throws IOException {
 338                 try {
 339                         return Instance.getCache().open(new URL(url), withoutQuery(url),
 340                                         this, true);
 341                 } catch (Exception e) {
 342                         // second chance
 343                         try {
 344                                 Thread.sleep(100);
 345                         } catch (InterruptedException ee) {
 346                         }
 347
 348                         return Instance.getCache().open(new URL(url), withoutQuery(url),
 349                                         this, true);
 350                 }
 351         }
 352
 353         /**
 354          * Return the same input {@link URL} but without the query part.
 355          *
 356          * @param url
 357          *            the inpiut {@link URL} as a {@link String}
 358          *
 359          * @return the input {@link URL} without query
 360          */
 361         private URL withoutQuery(String url) {
 362                 URL o = null;
 363                 try {
 364                         // Remove the query from o (originalUrl), so it can be cached
 365                         // correctly
 366                         o = new URL(url);
 367                         o = new URL(o.getProtocol() + "://" + o.getHost() + o.getPath());
 368
 369                         return o;
 370                 } catch (MalformedURLException e) {
 371                         return null;
 372                 }
 373         }
 374
 375         /**
 376          * Explode an HTML comma-separated list of values into a non-duplicate text
 377          * {@link List} .
 378          *
 379          * @param values
 380          *            the comma-separated values in HTML format
 381          *
 382          * @return the full list with no duplicate in text format
 383          */
 384         private List<String> explode(String values) {
 385                 List<String> list = new ArrayList<String>();
 386                 if (values != null && !values.isEmpty()) {
 387                         for (String auth : values.split(",")) {
 388                                 String a = StringUtils.unhtml(auth).trim();
 389                                 if (!a.isEmpty() && !list.contains(a.trim())) {
 390                                         list.add(a);
 391                                 }
 392                         }
 393                 }
 394
 395                 return list;
 396         }
 397
 398         @Override
 399         protected boolean supports(URL url) {
 400                 return "mangafox.me".equals(url.getHost())
 401                                 || "www.mangafox.me".equals(url.getHost())
 402                                 || "fanfox.net".equals(url.getHost())
 403                                 || "www.fanfox.net".equals(url.getHost());
 404         }
 405 }