diff options
author | Vitaly Takmazov | 2018-11-12 12:10:44 +0300 |
---|---|---|
committer | Vitaly Takmazov | 2018-11-12 12:10:49 +0300 |
commit | f7eeb5c5d40648bf1e0f76dab4e68c96995872c7 (patch) | |
tree | 79942d5fad59ca791e929b51eeb680dc55700165 /src/main/java/com/juick/util | |
parent | d158b85517189afd43532a752e0093394c135f0f (diff) |
api: message entities
Diffstat (limited to 'src/main/java/com/juick/util')
-rw-r--r-- | src/main/java/com/juick/util/MessageUtils.java | 82 |
1 files changed, 70 insertions, 12 deletions
diff --git a/src/main/java/com/juick/util/MessageUtils.java b/src/main/java/com/juick/util/MessageUtils.java index fd357c32..b1af2cdc 100644 --- a/src/main/java/com/juick/util/MessageUtils.java +++ b/src/main/java/com/juick/util/MessageUtils.java @@ -20,6 +20,7 @@ package com.juick.util; import com.juick.Message; import com.juick.Tag; import com.juick.User; +import com.juick.model.Entity; import org.apache.commons.codec.CharEncoding; import org.apache.commons.lang3.StringUtils; import org.springframework.web.util.UriComponentsBuilder; @@ -27,10 +28,8 @@ import org.springframework.web.util.UriComponentsBuilder; import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URLEncoder; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; -import java.util.TreeSet; +import java.util.*; +import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -74,6 +73,49 @@ public class MessageUtils { private final static String jidRegex = "((?<=\\s)|(?<=\\A))@([\\w\\-\\.]+@[\\w\\-\\.]+)((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; private final static Pattern jidPattern = Pattern.compile(jidRegex); + private final static String midRegex = "((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))#(\\d+)((?=\\s)|(?=\\Z)|(?=\\))|(?=\\.)|(?=\\,))"; + + private final static String ridRegex = "((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))#(\\d+)/(\\d+)((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; + + private final static String textUrlRegex = "\\[([^\\]]+)\\]\\[((?:ht|f)tps?://[^\\]]+)\\]"; + + private final static String boldRegex = "((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))\\*([^\\*\\n<>]+)\\*((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; + + private final static String italicRegex = "((?<=\\s)|(?<=\\A))/([^\\/\\n<>]+)/((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; + + private final static String underlineRegex = "((?<=\\s)|(?<=\\A))_([^\\_\\n<>]+)_((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; + + private final static String citateRegex = "(?:(?<=\\n)|(?<=\\A))> *(.*)?(\\n|(?=\\Z))"; + + public static List<Entity> getEntities(Message msg) { + + String txt = msg.getText(); + + // http://juick.com/last?page=2 + List<Entity> result = new ArrayList<>(entitiesForType("a", txt, urlWithWhitespacesRegex, matcher -> matcher.group(3), matcher -> Optional.of(matcher.group(2)))); + // [link text][http://juick.com/last?page=2] + result.addAll(entitiesForType("a", txt, textUrlRegex, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)))); + // #12345 + result.addAll(entitiesForType("a", txt, midRegex, matcher -> String.format("#%s", matcher.group(2)), matcher -> Optional.of("https://juick.com/m/" + matcher.group(2)))); + // #12345/65 + result.addAll(entitiesForType("a", txt, ridRegex, matcher -> String.format("#%s/%s", matcher.group(2), matcher.group(3)), matcher -> Optional.of(String.format("https://juick.com/m/%s#%s", matcher.group(2), matcher.group(3))))); + // /12 + result.addAll(entitiesForType("a", txt, replyNumberRegex, matcher -> "/" + matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/m/%d#%s", msg.getMid(), matcher.group(2))))); + // @username@jabber.org + result.addAll(entitiesForType("a", txt, jidRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))))); + // @username + result.addAll(entitiesForType("a", txt, usernameRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))))); + // *bold* + result.addAll(entitiesForType("b", txt, boldRegex, matcher -> matcher.group(2), matcher -> Optional.empty())); + // /italic/ + result.addAll(entitiesForType("i", txt, italicRegex, matcher -> matcher.group(2), matcher -> Optional.empty())); + // _underline_ + result.addAll(entitiesForType("u", txt, underlineRegex, matcher -> matcher.group(2), matcher -> Optional.empty())); + // > citate + result.addAll(entitiesForType("q", txt, citateRegex, matcher -> matcher.group(1), matcher -> Optional.empty())); + return result; + } + public static String formatMessageCode(String msg) { msg = msg.replaceAll("&", "&"); msg = msg.replaceAll("<", "<"); @@ -112,28 +154,27 @@ public class MessageUtils { // [link text][http://juick.com/last?page=2] // <a href="http://juick.com/last?page=2" rel="nofollow">link text</a> - msg = msg.replaceAll("\\[([^\\]]+)\\]\\[((?:ht|f)tps?://[^\\]]+)\\]", "<a href=\"$2\" rel=\"nofollow\">$1</a>"); - msg = msg.replaceAll("\\[([^\\]]+)\\]\\(((?:ht|f)tps?://[^\\)]+)\\)", "<a href=\"$2\" rel=\"nofollow\">$1</a>"); + msg = msg.replaceAll(textUrlRegex, "<a href=\"$2\" rel=\"nofollow\">$1</a>"); // #12345 // <a href="http://juick.com/12345">#12345</a> - msg = msg.replaceAll("((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))#(\\d+)((?=\\s)|(?=\\Z)|(?=\\))|(?=\\.)|(?=\\,))", "$1<a href=\"https://juick.com/m/$2\">#$2</a>$3"); + msg = msg.replaceAll(midRegex, "$1<a href=\"https://juick.com/m/$2\">#$2</a>$3"); // #12345/65 // <a href="http://juick.com/12345#65">#12345/65</a> - msg = msg.replaceAll("((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))#(\\d+)/(\\d+)((?=\\s)|(?=\\Z)|(?=\\p{Punct}))", "$1<a href=\"https://juick.com/m/$2#$3\">#$2/$3</a>$4"); + msg = msg.replaceAll(ridRegex, "$1<a href=\"https://juick.com/m/$2#$3\">#$2/$3</a>$4"); // *bold* // <b>bold</b> - msg = msg.replaceAll("((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))\\*([^\\*\\n<>]+)\\*((?=\\s)|(?=\\Z)|(?=\\p{Punct}))", "$1<b>$2</b>$3"); + msg = msg.replaceAll(boldRegex, "$1<b>$2</b>$3"); // /italic/ // <i>italic</i> - msg = msg.replaceAll("((?<=\\s)|(?<=\\A))/([^\\/\\n<>]+)/((?=\\s)|(?=\\Z)|(?=\\p{Punct}))", "$1<i>$2</i>$3"); + msg = msg.replaceAll(italicRegex, "$1<i>$2</i>$3"); // _underline_ // <span class="u">underline</span> - msg = msg.replaceAll("((?<=\\s)|(?<=\\A))_([^\\_\\n<>]+)_((?=\\s)|(?=\\Z)|(?=\\p{Punct}))", "$1<span class=\"u\">$2</span>$3"); + msg = msg.replaceAll(underlineRegex, "$1<span class=\"u\">$2</span>$3"); // /12 // <a href="#12">/12</a> @@ -159,7 +200,7 @@ public class MessageUtils { msg = sb.toString(); // > citate - msg = msg.replaceAll("(?:(?<=\\n)|(?<=\\A))> *(.*)?(\\n|(?=\\Z))", "<q>$1</q>"); + msg = msg.replaceAll(citateRegex, "<q>$1</q>"); msg = msg.replaceAll("</q><q>", "\n"); msg = msg.replaceAll("\n", "<br/>\n"); @@ -321,4 +362,21 @@ public class MessageUtils { public static List<String> getGlobalMentions(Message msg) { return collectMatches(jidPattern, msg.getText()); } + + private static List<Entity> entitiesForType(String type, String input, String patternText, Function<Matcher, String> textGroup, Function<Matcher, Optional<String>> linkGroup) { + List<Entity> result = new ArrayList<>(); + Pattern pattern = Pattern.compile(patternText); + Matcher matcher = pattern.matcher(input); + while (matcher.find()) { + Entity entity = new Entity(); + entity.setType(type); + entity.setText(textGroup.apply(matcher)); + Optional<String> link = linkGroup.apply(matcher); + link.ifPresent(entity::setUrl); + entity.setStart(matcher.start()); + entity.setEnd(matcher.end()); + result.add(entity); + } + return result; + } } |