[jvcard.git] / lanterna / TerminalTextUtils.java

/*
 * This file is part of lanterna (http://code.google.com/p/lanterna/).
 * 
 * lanterna is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 * 
 * Copyright (C) 2010-2015 Martin
 */
package com.googlecode.lanterna;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;

/**
 * This class contains a number of utility methods for analyzing characters and
 * strings in a terminal context. The main purpose is to make it easier to work
 * with text that may or may not contain double-width text characters, such as
 * CJK (Chinese, Japanese, Korean) and other special symbols. This class assumes
 * those are all double-width and in case the terminal (-emulator) chooses to
 * draw them (somehow) as single-column then all the calculations in this class
 * will be wrong. It seems safe to assume what this class considers double-width
 * really is taking up two columns though.
 * 
 * @author Martin
 */
public class TerminalTextUtils {
	private TerminalTextUtils() {
	}

	/**
	 * Given a character, is this character considered to be a CJK character?
	 * Shamelessly stolen from <a href="http://stackoverflow.com/questions/1499804/how-can-i-detect-japanese-text-in-a-java-string"
	 * >StackOverflow</a> where it was contributed by user Rakesh N
	 * 
	 * @param c
	 *            Character to test
	 * @return {@code true} if the character is a CJK character
	 * 
	 */
	public static boolean isCharCJK(final char c) {
		Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(c);
		return (unicodeBlock == Character.UnicodeBlock.HIRAGANA)
				|| (unicodeBlock == Character.UnicodeBlock.KATAKANA)
				|| (unicodeBlock == Character.UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS)
				|| (unicodeBlock == Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO)
				|| (unicodeBlock == Character.UnicodeBlock.HANGUL_JAMO)
				|| (unicodeBlock == Character.UnicodeBlock.HANGUL_SYLLABLES)
				|| (unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS)
				|| (unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A)
				|| (unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B)
				|| (unicodeBlock == Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS)
				|| (unicodeBlock == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS)
				|| (unicodeBlock == Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT)
				|| (unicodeBlock == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION)
				|| (unicodeBlock == Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS)
				|| (unicodeBlock == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS && c < 0xFF61); // The
																											// magic
																											// number
																											// here
																											// is
																											// the
																											// separating
																											// index
																											// between
																											// full-width
																											// and
																											// half-width
	}

	/**
	 * Checks if a character is expected to be taking up two columns if printed
	 * to a terminal. This will generally be {@code true} for CJK (Chinese,
	 * Japanese and Korean) characters.
	 * 
	 * @param c
	 *            Character to test if it's double-width when printed to a
	 *            terminal
	 * @return {@code true} if this character is expected to be taking up two
	 *         columns when printed to the terminal, otherwise {@code false}
	 */
	public static boolean isCharDoubleWidth(final char c) {
		return isCharCJK(c);
	}

	/**
	 * @deprecated Call {@code getColumnWidth(s)} instead
	 */
	@Deprecated
	public static int getTrueWidth(String s) {
		return getColumnWidth(s);
	}

	/**
	 * Given a string, returns how many columns this string would need to occupy
	 * in a terminal, taking into account that CJK characters takes up two
	 * columns.
	 * 
	 * @param s
	 *            String to check length
	 * @return Number of actual terminal columns the string would occupy
	 */
	public static int getColumnWidth(String s) {
		return getColumnIndex(s, s.length());
	}

	/**
	 * Given a string and a character index inside that string, find out what
	 * the column index of that character would be if printed in a terminal. If
	 * the string only contains non-CJK characters then the returned value will
	 * be same as {@code stringCharacterIndex}, but if there are CJK characters
	 * the value will be different due to CJK characters taking up two columns
	 * in width. If the character at the index in the string is a CJK character
	 * itself, the returned value will be the index of the left-side of
	 * character.
	 * 
	 * @param s
	 *            String to translate the index from
	 * @param stringCharacterIndex
	 *            Index within the string to get the terminal column index of
	 * @return Index of the character inside the String at {@code
	 *         stringCharacterIndex} when it has been writted to a terminal
	 * @throws StringIndexOutOfBoundsException
	 *             if the index given is outside the String length or negative
	 */
	public static int getColumnIndex(String s, int stringCharacterIndex)
			throws StringIndexOutOfBoundsException {
		int index = 0;
		for (int i = 0; i < stringCharacterIndex; i++) {
			if (isCharCJK(s.charAt(i))) {
				index++;
			}
			index++;
		}
		return index;
	}

	/**
	 * This method does the reverse of getColumnIndex, given a String and
	 * imagining it has been printed out to the top-left corner of a terminal,
	 * in the column specified by {@code columnIndex}, what is the index of that
	 * character in the string. If the string contains no CJK characters, this
	 * will always be the same as {@code columnIndex}. If the index specified is
	 * the right column of a CJK character, the index is the same as if the
	 * column was the left column. So calling {@code
	 * getStringCharacterIndex("英", 0)} and {@code getStringCharacterIndex("英",
	 * 1)} will both return 0.
	 * 
	 * @param s
	 *            String to translate the index to
	 * @param columnIndex
	 *            Column index of the string written to a terminal
	 * @return The index in the string of the character in terminal column
	 *         {@code columnIndex}
	 */
	public static int getStringCharacterIndex(String s, int columnIndex) {
		int index = 0;
		int counter = 0;
		while (counter < columnIndex) {
			if (isCharCJK(s.charAt(index++))) {
				counter++;
				if (counter == columnIndex) {
					return index - 1;
				}
			}
			counter++;
		}
		return index;
	}

	/**
	 * Given a string that may or may not contain CJK characters, returns the
	 * substring which will fit inside <code>availableColumnSpace</code>
	 * columns. This method does not handle special cases like tab or new-line.
	 * <p>
	 * Calling this method is the same as calling {@code fitString(string, 0,
	 * availableColumnSpace)}.
	 * 
	 * @param string
	 *            The string to fit inside the availableColumnSpace
	 * @param availableColumnSpace
	 *            Number of columns to fit the string inside
	 * @return The whole or part of the input string which will fit inside the
	 *         supplied availableColumnSpace
	 */
	public static String fitString(String string, int availableColumnSpace) {
		return fitString(string, 0, availableColumnSpace);
	}

	/**
	 * Given a string that may or may not contain CJK characters, returns the
	 * substring which will fit inside <code>availableColumnSpace</code>
	 * columns. This method does not handle special cases like tab or new-line.
	 * <p>
	 * This overload has a {@code fromColumn} parameter that specified where
	 * inside the string to start fitting. Please notice that {@code fromColumn}
	 * is not a character index inside the string, but a column index as if the
	 * string has been printed from the left-most side of the terminal. So if
	 * the string is "日本語", fromColumn set to 1 will not starting counting from
	 * the second character ("本") in the string but from the CJK filler
	 * character belonging to "日". If you want to count from a particular
	 * character index inside the string, please pass in a substring and use
	 * fromColumn set to 0.
	 * 
	 * @param string
	 *            The string to fit inside the availableColumnSpace
	 * @param fromColumn
	 *            From what column of the input string to start fitting (see
	 *            description above!)
	 * @param availableColumnSpace
	 *            Number of columns to fit the string inside
	 * @return The whole or part of the input string which will fit inside the
	 *         supplied availableColumnSpace
	 */
	public static String fitString(String string, int fromColumn,
			int availableColumnSpace) {
		if (availableColumnSpace <= 0) {
			return "";
		}

		StringBuilder bob = new StringBuilder();
		int column = 0;
		int index = 0;
		while (index < string.length() && column < fromColumn) {
			char c = string.charAt(index++);
			column += TerminalTextUtils.isCharCJK(c) ? 2 : 1;
		}
		if (column > fromColumn) {
			bob.append(" ");
			availableColumnSpace--;
		}

		while (availableColumnSpace > 0 && index < string.length()) {
			char c = string.charAt(index++);
			availableColumnSpace -= TerminalTextUtils.isCharCJK(c) ? 2 : 1;
			if (availableColumnSpace < 0) {
				bob.append(' ');
			} else {
				bob.append(c);
			}
		}
		return bob.toString();
	}

	/**
	 * This method will calculate word wrappings given a number of lines of text
	 * and how wide the text can be printed. The result is a list of new rows
	 * where word-wrapping was applied.
	 * 
	 * @param maxWidth
	 *            Maximum number of columns that can be used before
	 *            word-wrapping is applied
	 * @param lines
	 *            Input text
	 * @return The input text word-wrapped at {@code maxWidth}; this may contain
	 *         more rows than the input text
	 */
	public static List<String> getWordWrappedText(int maxWidth, String... lines) {
		List<String> result = new ArrayList<String>();
		LinkedList<String> linesToBeWrapped = new LinkedList<String>(Arrays
				.asList(lines));
		while (!linesToBeWrapped.isEmpty()) {
			String row = linesToBeWrapped.removeFirst();
			int rowWidth = getColumnWidth(row);
			if (rowWidth <= maxWidth) {
				result.add(row);
			} else {
				// Now search in reverse and find the first possible line-break
				int characterIndex = getStringCharacterIndex(row, maxWidth);
				while (!Character.isSpaceChar(row.charAt(characterIndex))
						&& !isCharCJK(row.charAt(characterIndex))
						&& characterIndex > 0) {
					characterIndex--;
				}

				if (characterIndex == 0) {
					// Failed! There was no 'nice' place to cut so just cut it
					// at maxWidth
					result.add(row.substring(0, maxWidth));
					linesToBeWrapped.addFirst(row.substring(maxWidth));
				} else {
					// Ok, split the row, add it to the result and continue
					// processing the second half on a new line
					result.add(row.substring(0, characterIndex));
					int spaceCharsToSkip = 0;
					while (characterIndex < row.length()
							&& Character
									.isSpaceChar(row.charAt(characterIndex))) {
						characterIndex++;
					}
					;
					linesToBeWrapped.addFirst(row.substring(characterIndex));
				}
			}
		}
		return result;
	}
}
Commit	Line	Data
	1	/*
	2	* This file is part of lanterna (http://code.google.com/p/lanterna/).
	3	*
	4	* lanterna is free software: you can redistribute it and/or modify
	5	* it under the terms of the GNU Lesser General Public License as published by
	6	* the Free Software Foundation, either version 3 of the License, or
	7	* (at your option) any later version.
	8	*
	9	* This program is distributed in the hope that it will be useful,
	10	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	11	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	12	* GNU Lesser General Public License for more details.
	13	*
	14	* You should have received a copy of the GNU Lesser General Public License
	15	* along with this program. If not, see <http://www.gnu.org/licenses/>.
	16	*
	17	* Copyright (C) 2010-2015 Martin
	18	*/
	19	package com.googlecode.lanterna;
	20
	21	import java.util.ArrayList;
	22	import java.util.Arrays;
	23	import java.util.LinkedList;
	24	import java.util.List;
	25
	26	/**
	27	* This class contains a number of utility methods for analyzing characters and
	28	* strings in a terminal context. The main purpose is to make it easier to work
	29	* with text that may or may not contain double-width text characters, such as
	30	* CJK (Chinese, Japanese, Korean) and other special symbols. This class assumes
	31	* those are all double-width and in case the terminal (-emulator) chooses to
	32	* draw them (somehow) as single-column then all the calculations in this class
	33	* will be wrong. It seems safe to assume what this class considers double-width
	34	* really is taking up two columns though.
	35	*
	36	* @author Martin
	37	*/
	38	public class TerminalTextUtils {
	39	private TerminalTextUtils() {
	40	}
	41
	42	/**
	43	* Given a character, is this character considered to be a CJK character?
	44	* Shamelessly stolen from <a href="http://stackoverflow.com/questions/1499804/how-can-i-detect-japanese-text-in-a-java-string"
	45	* >StackOverflow</a> where it was contributed by user Rakesh N
	46	*
	47	* @param c
	48	* Character to test
	49	* @return {@code true} if the character is a CJK character
	50	*
	51	*/
	52	public static boolean isCharCJK(final char c) {
	53	Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(c);
	54	return (unicodeBlock == Character.UnicodeBlock.HIRAGANA)
	55	\|\| (unicodeBlock == Character.UnicodeBlock.KATAKANA)
	56	\|\| (unicodeBlock == Character.UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS)
	57	\|\| (unicodeBlock == Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO)
	58	\|\| (unicodeBlock == Character.UnicodeBlock.HANGUL_JAMO)
	59	\|\| (unicodeBlock == Character.UnicodeBlock.HANGUL_SYLLABLES)
	60	\|\| (unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS)
	61	\|\| (unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A)
	62	\|\| (unicodeBlock == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B)
	63	\|\| (unicodeBlock == Character.UnicodeBlock.CJK_COMPATIBILITY_FORMS)
	64	\|\| (unicodeBlock == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS)
	65	\|\| (unicodeBlock == Character.UnicodeBlock.CJK_RADICALS_SUPPLEMENT)
	66	\|\| (unicodeBlock == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION)
	67	\|\| (unicodeBlock == Character.UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS)
	68	\|\| (unicodeBlock == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS && c < 0xFF61); // The
	69	// magic
	70	// number
	71	// here
	72	// is
	73	// the
	74	// separating
	75	// index
	76	// between
	77	// full-width
	78	// and
	79	// half-width
	80	}
	81
	82	/**
	83	* Checks if a character is expected to be taking up two columns if printed
	84	* to a terminal. This will generally be {@code true} for CJK (Chinese,
	85	* Japanese and Korean) characters.
	86	*
	87	* @param c
	88	* Character to test if it's double-width when printed to a
	89	* terminal
	90	* @return {@code true} if this character is expected to be taking up two
	91	* columns when printed to the terminal, otherwise {@code false}
	92	*/
	93	public static boolean isCharDoubleWidth(final char c) {
	94	return isCharCJK(c);
	95	}
	96
	97	/**
	98	* @deprecated Call {@code getColumnWidth(s)} instead
	99	*/
	100	@Deprecated
	101	public static int getTrueWidth(String s) {
	102	return getColumnWidth(s);
	103	}
	104
	105	/**
	106	* Given a string, returns how many columns this string would need to occupy
	107	* in a terminal, taking into account that CJK characters takes up two
	108	* columns.
	109	*
	110	* @param s
	111	* String to check length
	112	* @return Number of actual terminal columns the string would occupy
	113	*/
	114	public static int getColumnWidth(String s) {
	115	return getColumnIndex(s, s.length());
	116	}
	117
	118	/**
	119	* Given a string and a character index inside that string, find out what
	120	* the column index of that character would be if printed in a terminal. If
	121	* the string only contains non-CJK characters then the returned value will
	122	* be same as {@code stringCharacterIndex}, but if there are CJK characters
	123	* the value will be different due to CJK characters taking up two columns
	124	* in width. If the character at the index in the string is a CJK character
	125	* itself, the returned value will be the index of the left-side of
	126	* character.
	127	*
	128	* @param s
	129	* String to translate the index from
	130	* @param stringCharacterIndex
	131	* Index within the string to get the terminal column index of
	132	* @return Index of the character inside the String at {@code
	133	* stringCharacterIndex} when it has been writted to a terminal
	134	* @throws StringIndexOutOfBoundsException
	135	* if the index given is outside the String length or negative
	136	*/
	137	public static int getColumnIndex(String s, int stringCharacterIndex)
	138	throws StringIndexOutOfBoundsException {
	139	int index = 0;
	140	for (int i = 0; i < stringCharacterIndex; i++) {
	141	if (isCharCJK(s.charAt(i))) {
	142	index++;
	143	}
	144	index++;
	145	}
	146	return index;
	147	}
	148
	149	/**
	150	* This method does the reverse of getColumnIndex, given a String and
	151	* imagining it has been printed out to the top-left corner of a terminal,
	152	* in the column specified by {@code columnIndex}, what is the index of that
	153	* character in the string. If the string contains no CJK characters, this
	154	* will always be the same as {@code columnIndex}. If the index specified is
	155	* the right column of a CJK character, the index is the same as if the
	156	* column was the left column. So calling {@code
	157	* getStringCharacterIndex("英", 0)} and {@code getStringCharacterIndex("英",
	158	* 1)} will both return 0.
	159	*
	160	* @param s
	161	* String to translate the index to
	162	* @param columnIndex
	163	* Column index of the string written to a terminal
	164	* @return The index in the string of the character in terminal column
	165	* {@code columnIndex}
	166	*/
	167	public static int getStringCharacterIndex(String s, int columnIndex) {
	168	int index = 0;
	169	int counter = 0;
	170	while (counter < columnIndex) {
	171	if (isCharCJK(s.charAt(index++))) {
	172	counter++;
	173	if (counter == columnIndex) {
	174	return index - 1;
	175	}
	176	}
	177	counter++;
	178	}
	179	return index;
	180	}
	181
	182	/**
	183	* Given a string that may or may not contain CJK characters, returns the
	184	* substring which will fit inside <code>availableColumnSpace</code>
	185	* columns. This method does not handle special cases like tab or new-line.
	186	* <p>
	187	* Calling this method is the same as calling {@code fitString(string, 0,
	188	* availableColumnSpace)}.
	189	*
	190	* @param string
	191	* The string to fit inside the availableColumnSpace
	192	* @param availableColumnSpace
	193	* Number of columns to fit the string inside
	194	* @return The whole or part of the input string which will fit inside the
	195	* supplied availableColumnSpace
	196	*/
	197	public static String fitString(String string, int availableColumnSpace) {
	198	return fitString(string, 0, availableColumnSpace);
	199	}
	200
	201	/**
	202	* Given a string that may or may not contain CJK characters, returns the
	203	* substring which will fit inside <code>availableColumnSpace</code>
	204	* columns. This method does not handle special cases like tab or new-line.
	205	* <p>
	206	* This overload has a {@code fromColumn} parameter that specified where
	207	* inside the string to start fitting. Please notice that {@code fromColumn}
	208	* is not a character index inside the string, but a column index as if the
	209	* string has been printed from the left-most side of the terminal. So if
	210	* the string is "日本語", fromColumn set to 1 will not starting counting from
	211	* the second character ("本") in the string but from the CJK filler
	212	* character belonging to "日". If you want to count from a particular
	213	* character index inside the string, please pass in a substring and use
	214	* fromColumn set to 0.
	215	*
	216	* @param string
	217	* The string to fit inside the availableColumnSpace
	218	* @param fromColumn
	219	* From what column of the input string to start fitting (see
	220	* description above!)
	221	* @param availableColumnSpace
	222	* Number of columns to fit the string inside
	223	* @return The whole or part of the input string which will fit inside the
	224	* supplied availableColumnSpace
	225	*/
	226	public static String fitString(String string, int fromColumn,
	227	int availableColumnSpace) {
	228	if (availableColumnSpace <= 0) {
	229	return "";
	230	}
	231
	232	StringBuilder bob = new StringBuilder();
	233	int column = 0;
	234	int index = 0;
	235	while (index < string.length() && column < fromColumn) {
	236	char c = string.charAt(index++);
	237	column += TerminalTextUtils.isCharCJK(c) ? 2 : 1;
	238	}
	239	if (column > fromColumn) {
	240	bob.append(" ");
	241	availableColumnSpace--;
	242	}
	243
	244	while (availableColumnSpace > 0 && index < string.length()) {
	245	char c = string.charAt(index++);
	246	availableColumnSpace -= TerminalTextUtils.isCharCJK(c) ? 2 : 1;
	247	if (availableColumnSpace < 0) {
	248	bob.append(' ');
	249	} else {
	250	bob.append(c);
	251	}
	252	}
	253	return bob.toString();
	254	}
	255
	256	/**
	257	* This method will calculate word wrappings given a number of lines of text
	258	* and how wide the text can be printed. The result is a list of new rows
	259	* where word-wrapping was applied.
	260	*
	261	* @param maxWidth
	262	* Maximum number of columns that can be used before
	263	* word-wrapping is applied
	264	* @param lines
	265	* Input text
	266	* @return The input text word-wrapped at {@code maxWidth}; this may contain
	267	* more rows than the input text
	268	*/
	269	public static List<String> getWordWrappedText(int maxWidth, String... lines) {
	270	List<String> result = new ArrayList<String>();
	271	LinkedList<String> linesToBeWrapped = new LinkedList<String>(Arrays
	272	.asList(lines));
	273	while (!linesToBeWrapped.isEmpty()) {
	274	String row = linesToBeWrapped.removeFirst();
	275	int rowWidth = getColumnWidth(row);
	276	if (rowWidth <= maxWidth) {
	277	result.add(row);
	278	} else {
	279	// Now search in reverse and find the first possible line-break
	280	int characterIndex = getStringCharacterIndex(row, maxWidth);
	281	while (!Character.isSpaceChar(row.charAt(characterIndex))
	282	&& !isCharCJK(row.charAt(characterIndex))
	283	&& characterIndex > 0) {
	284	characterIndex--;
	285	}
	286
	287	if (characterIndex == 0) {
	288	// Failed! There was no 'nice' place to cut so just cut it
	289	// at maxWidth
	290	result.add(row.substring(0, maxWidth));
	291	linesToBeWrapped.addFirst(row.substring(maxWidth));
	292	} else {
	293	// Ok, split the row, add it to the result and continue
	294	// processing the second half on a new line
	295	result.add(row.substring(0, characterIndex));
	296	int spaceCharsToSkip = 0;
	297	while (characterIndex < row.length()
	298	&& Character
	299	.isSpaceChar(row.charAt(characterIndex))) {
	300	characterIndex++;
	301	}
	302	;
	303	linesToBeWrapped.addFirst(row.substring(characterIndex));
	304	}
	305	}
	306	}
	307	return result;
	308	}
	309	}