From f7eeb5c5d40648bf1e0f76dab4e68c96995872c7 Mon Sep 17 00:00:00 2001 From: Vitaly Takmazov Date: Mon, 12 Nov 2018 12:10:44 +0300 Subject: api: message entities --- src/main/java/com/juick/Message.java | 11 +++ src/main/java/com/juick/model/Entity.java | 49 +++++++++++++ .../com/juick/service/MessagesServiceImpl.java | 5 ++ src/main/java/com/juick/util/MessageUtils.java | 82 ++++++++++++++++++---- src/test/java/com/juick/MessageTest.java | 12 ++++ 5 files changed, 147 insertions(+), 12 deletions(-) create mode 100644 src/main/java/com/juick/model/Entity.java diff --git a/src/main/java/com/juick/Message.java b/src/main/java/com/juick/Message.java index bd2c91b5..10380826 100644 --- a/src/main/java/com/juick/Message.java +++ b/src/main/java/com/juick/Message.java @@ -20,6 +20,7 @@ import com.fasterxml.jackson.annotation.JsonFormat; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.juick.adapters.SimpleDateAdapter; +import com.juick.model.Entity; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.builder.ToStringBuilder; @@ -82,6 +83,8 @@ public class Message implements Comparable { private Set recommendations; + private List entities; + public Message() { tags = new ArrayList<>(); reactions = new HashSet<>(); @@ -375,4 +378,12 @@ public class Message implements Comparable { public void setReplyToUri(URI replyToUri) { this.replyToUri = replyToUri; } + + public List getEntities() { + return entities; + } + + public void setEntities(List entities) { + this.entities = entities; + } } diff --git a/src/main/java/com/juick/model/Entity.java b/src/main/java/com/juick/model/Entity.java new file mode 100644 index 00000000..15a88a32 --- /dev/null +++ b/src/main/java/com/juick/model/Entity.java @@ -0,0 +1,49 @@ +package com.juick.model; + +public class Entity { + private String type; + private String url; + private String text; + private int start; + private int end; + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getText() { + return text; + } + + public void setText(String text) { + this.text = text; + } + + public int getStart() { + return start; + } + + public void setStart(int start) { + this.start = start; + } + + public int getEnd() { + return end; + } + + public void setEnd(int end) { + this.end = end; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } +} diff --git a/src/main/java/com/juick/service/MessagesServiceImpl.java b/src/main/java/com/juick/service/MessagesServiceImpl.java index 0b7faf87..efe480c0 100644 --- a/src/main/java/com/juick/service/MessagesServiceImpl.java +++ b/src/main/java/com/juick/service/MessagesServiceImpl.java @@ -368,6 +368,7 @@ public class MessagesServiceImpl extends BaseJdbcService implements MessagesServ final Message message = list.get(0); Map> reactionStats = updateReactionsFor(Collections.singletonList(mid)); message.setReactions(reactionStats.get(message.getMid())); + message.setEntities(MessageUtils.getEntities(message)); return message; } return null; @@ -429,6 +430,7 @@ public class MessagesServiceImpl extends BaseJdbcService implements MessagesServ logger.warn("exception reading images for mid {} rid {}", msg.getMid(), msg.getRid(), e); } } + msg.setEntities(MessageUtils.getEntities(msg)); return msg; }, mid, rid); @@ -882,6 +884,8 @@ public class MessagesServiceImpl extends BaseJdbcService implements MessagesServ msgs.sort(Comparator.comparing(item -> mids.indexOf(item.getMid()))); + msgs.forEach(i -> i.setEntities(MessageUtils.getEntities(i))); + return msgs; } return Collections.emptyList(); @@ -957,6 +961,7 @@ public class MessagesServiceImpl extends BaseJdbcService implements MessagesServ if (replies.size() > 0) { setRead(user, mid); } + replies.forEach(i -> i.setEntities(MessageUtils.getEntities(i))); return replies; } diff --git a/src/main/java/com/juick/util/MessageUtils.java b/src/main/java/com/juick/util/MessageUtils.java index fd357c32..b1af2cdc 100644 --- a/src/main/java/com/juick/util/MessageUtils.java +++ b/src/main/java/com/juick/util/MessageUtils.java @@ -20,6 +20,7 @@ package com.juick.util; import com.juick.Message; import com.juick.Tag; import com.juick.User; +import com.juick.model.Entity; import org.apache.commons.codec.CharEncoding; import org.apache.commons.lang3.StringUtils; import org.springframework.web.util.UriComponentsBuilder; @@ -27,10 +28,8 @@ import org.springframework.web.util.UriComponentsBuilder; import java.io.UnsupportedEncodingException; import java.net.URI; import java.net.URLEncoder; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; -import java.util.TreeSet; +import java.util.*; +import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -74,6 +73,49 @@ public class MessageUtils { private final static String jidRegex = "((?<=\\s)|(?<=\\A))@([\\w\\-\\.]+@[\\w\\-\\.]+)((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; private final static Pattern jidPattern = Pattern.compile(jidRegex); + private final static String midRegex = "((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))#(\\d+)((?=\\s)|(?=\\Z)|(?=\\))|(?=\\.)|(?=\\,))"; + + private final static String ridRegex = "((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))#(\\d+)/(\\d+)((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; + + private final static String textUrlRegex = "\\[([^\\]]+)\\]\\[((?:ht|f)tps?://[^\\]]+)\\]"; + + private final static String boldRegex = "((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))\\*([^\\*\\n<>]+)\\*((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; + + private final static String italicRegex = "((?<=\\s)|(?<=\\A))/([^\\/\\n<>]+)/((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; + + private final static String underlineRegex = "((?<=\\s)|(?<=\\A))_([^\\_\\n<>]+)_((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; + + private final static String citateRegex = "(?:(?<=\\n)|(?<=\\A))> *(.*)?(\\n|(?=\\Z))"; + + public static List getEntities(Message msg) { + + String txt = msg.getText(); + + // http://juick.com/last?page=2 + List result = new ArrayList<>(entitiesForType("a", txt, urlWithWhitespacesRegex, matcher -> matcher.group(3), matcher -> Optional.of(matcher.group(2)))); + // [link text][http://juick.com/last?page=2] + result.addAll(entitiesForType("a", txt, textUrlRegex, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)))); + // #12345 + result.addAll(entitiesForType("a", txt, midRegex, matcher -> String.format("#%s", matcher.group(2)), matcher -> Optional.of("https://juick.com/m/" + matcher.group(2)))); + // #12345/65 + result.addAll(entitiesForType("a", txt, ridRegex, matcher -> String.format("#%s/%s", matcher.group(2), matcher.group(3)), matcher -> Optional.of(String.format("https://juick.com/m/%s#%s", matcher.group(2), matcher.group(3))))); + // /12 + result.addAll(entitiesForType("a", txt, replyNumberRegex, matcher -> "/" + matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/m/%d#%s", msg.getMid(), matcher.group(2))))); + // @username@jabber.org + result.addAll(entitiesForType("a", txt, jidRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))))); + // @username + result.addAll(entitiesForType("a", txt, usernameRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))))); + // *bold* + result.addAll(entitiesForType("b", txt, boldRegex, matcher -> matcher.group(2), matcher -> Optional.empty())); + // /italic/ + result.addAll(entitiesForType("i", txt, italicRegex, matcher -> matcher.group(2), matcher -> Optional.empty())); + // _underline_ + result.addAll(entitiesForType("u", txt, underlineRegex, matcher -> matcher.group(2), matcher -> Optional.empty())); + // > citate + result.addAll(entitiesForType("q", txt, citateRegex, matcher -> matcher.group(1), matcher -> Optional.empty())); + return result; + } + public static String formatMessageCode(String msg) { msg = msg.replaceAll("&", "&"); msg = msg.replaceAll("<", "<"); @@ -112,28 +154,27 @@ public class MessageUtils { // [link text][http://juick.com/last?page=2] // link text - msg = msg.replaceAll("\\[([^\\]]+)\\]\\[((?:ht|f)tps?://[^\\]]+)\\]", "$1"); - msg = msg.replaceAll("\\[([^\\]]+)\\]\\(((?:ht|f)tps?://[^\\)]+)\\)", "$1"); + msg = msg.replaceAll(textUrlRegex, "$1"); // #12345 // #12345 - msg = msg.replaceAll("((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))#(\\d+)((?=\\s)|(?=\\Z)|(?=\\))|(?=\\.)|(?=\\,))", "$1#$2$3"); + msg = msg.replaceAll(midRegex, "$1#$2$3"); // #12345/65 // #12345/65 - msg = msg.replaceAll("((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))#(\\d+)/(\\d+)((?=\\s)|(?=\\Z)|(?=\\p{Punct}))", "$1#$2/$3$4"); + msg = msg.replaceAll(ridRegex, "$1#$2/$3$4"); // *bold* // bold - msg = msg.replaceAll("((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))\\*([^\\*\\n<>]+)\\*((?=\\s)|(?=\\Z)|(?=\\p{Punct}))", "$1$2$3"); + msg = msg.replaceAll(boldRegex, "$1$2$3"); // /italic/ // italic - msg = msg.replaceAll("((?<=\\s)|(?<=\\A))/([^\\/\\n<>]+)/((?=\\s)|(?=\\Z)|(?=\\p{Punct}))", "$1$2$3"); + msg = msg.replaceAll(italicRegex, "$1$2$3"); // _underline_ // underline - msg = msg.replaceAll("((?<=\\s)|(?<=\\A))_([^\\_\\n<>]+)_((?=\\s)|(?=\\Z)|(?=\\p{Punct}))", "$1$2$3"); + msg = msg.replaceAll(underlineRegex, "$1$2$3"); // /12 // /12 @@ -159,7 +200,7 @@ public class MessageUtils { msg = sb.toString(); // > citate - msg = msg.replaceAll("(?:(?<=\\n)|(?<=\\A))> *(.*)?(\\n|(?=\\Z))", "$1"); + msg = msg.replaceAll(citateRegex, "$1"); msg = msg.replaceAll("", "\n"); msg = msg.replaceAll("\n", "
\n"); @@ -321,4 +362,21 @@ public class MessageUtils { public static List getGlobalMentions(Message msg) { return collectMatches(jidPattern, msg.getText()); } + + private static List entitiesForType(String type, String input, String patternText, Function textGroup, Function> linkGroup) { + List result = new ArrayList<>(); + Pattern pattern = Pattern.compile(patternText); + Matcher matcher = pattern.matcher(input); + while (matcher.find()) { + Entity entity = new Entity(); + entity.setType(type); + entity.setText(textGroup.apply(matcher)); + Optional link = linkGroup.apply(matcher); + link.ifPresent(entity::setUrl); + entity.setStart(matcher.start()); + entity.setEnd(matcher.end()); + result.add(entity); + } + return result; + } } diff --git a/src/test/java/com/juick/MessageTest.java b/src/test/java/com/juick/MessageTest.java index 6197f861..f76054cd 100644 --- a/src/test/java/com/juick/MessageTest.java +++ b/src/test/java/com/juick/MessageTest.java @@ -17,12 +17,17 @@ package com.juick; +import com.juick.model.Entity; +import com.juick.test.util.MockUtils; import com.juick.util.MessageUtils; import org.apache.commons.lang3.RandomUtils; import org.apache.commons.lang3.StringUtils; import org.junit.Test; +import java.util.ArrayList; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.*; @@ -180,4 +185,11 @@ public class MessageTest { msg.setText("And dick is @ugnich@jabber.zp.ua"); assertThat(MessageUtils.getGlobalMentions(msg).size(), is(1)); } + @Test + public void entitiesTest() { + String msg = "http://google.com - there will @ugnich ask questions from #4321, then go to http://stackoverflow.com"; + Message testMessage = MockUtils.mockMessage(514, MockUtils.mockUser(5432, "fmap", "secret"), msg); + List entities = MessageUtils.getEntities(testMessage); + assertThat(entities.size(), is(4)); + } } -- cgit v1.2.3