From 445e6bbc61da68149ccd12e05cf781cffcf05aee Mon Sep 17 00:00:00 2001 From: Vitaly Takmazov Date: Thu, 13 Dec 2018 10:49:16 +0300 Subject: do not count linebreak as part of entity --- src/main/java/com/juick/util/MessageUtils.java | 41 +++++++++++++++++--------- src/test/java/com/juick/MessageTest.java | 4 +++ 2 files changed, 31 insertions(+), 14 deletions(-) diff --git a/src/main/java/com/juick/util/MessageUtils.java b/src/main/java/com/juick/util/MessageUtils.java index cc0d7b12..5669a454 100644 --- a/src/main/java/com/juick/util/MessageUtils.java +++ b/src/main/java/com/juick/util/MessageUtils.java @@ -94,29 +94,29 @@ public class MessageUtils { String txt = msg.getText(); // http://juick.com/last?page=2 - List result = new ArrayList<>(entitiesForType("a", txt, urlWithWhitespacesRegex, matcher -> matcher.group(3), matcher -> Optional.of(matcher.group(2)))); + List result = new ArrayList<>(entitiesForType("a", txt, urlWithWhitespacesRegex, matcher -> matcher.group(3), matcher -> Optional.of(matcher.group(2)), 0)); // [link text][http://juick.com/last?page=2] - result.addAll(entitiesForType("a", txt, textUrlRegex, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)))); + result.addAll(entitiesForType("a", txt, textUrlRegex, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)), 0)); // [link text](http://juick.com/last?page=2) - result.addAll(entitiesForType("a", txt, textUrlRegex2, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)))); + result.addAll(entitiesForType("a", txt, textUrlRegex2, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)), 0)); // #12345 - result.addAll(entitiesForType("a", txt, midRegex, matcher -> String.format("#%s", matcher.group(2)), matcher -> Optional.of("https://juick.com/m/" + matcher.group(2)))); + result.addAll(entitiesForType("a", txt, midRegex, matcher -> String.format("#%s", matcher.group(2)), matcher -> Optional.of("https://juick.com/m/" + matcher.group(2)), 0)); // #12345/65 - result.addAll(entitiesForType("a", txt, ridRegex, matcher -> String.format("#%s/%s", matcher.group(2), matcher.group(3)), matcher -> Optional.of(String.format("https://juick.com/m/%s#%s", matcher.group(2), matcher.group(3))))); + result.addAll(entitiesForType("a", txt, ridRegex, matcher -> String.format("#%s/%s", matcher.group(2), matcher.group(3)), matcher -> Optional.of(String.format("https://juick.com/m/%s#%s", matcher.group(2), matcher.group(3))), 0)); // /12 - result.addAll(entitiesForType("a", txt, replyNumberRegex, matcher -> "/" + matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/m/%d#%s", msg.getMid(), matcher.group(2))))); + result.addAll(entitiesForType("a", txt, replyNumberRegex, matcher -> "/" + matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/m/%d#%s", msg.getMid(), matcher.group(2))), 0)); // @username@jabber.org - result.addAll(entitiesForType("a", txt, jidRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))))); + result.addAll(entitiesForType("a", txt, jidRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))), 0)); // @username - result.addAll(entitiesForType("a", txt, usernameRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))))); + result.addAll(entitiesForType("a", txt, usernameRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))), 0)); // *bold* - result.addAll(entitiesForType("b", txt, boldRegex, matcher -> matcher.group(2), matcher -> Optional.empty())); + result.addAll(entitiesForType("b", txt, boldRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0)); // /italic/ - result.addAll(entitiesForType("i", txt, italicRegex, matcher -> matcher.group(2), matcher -> Optional.empty())); + result.addAll(entitiesForType("i", txt, italicRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0)); // _underline_ - result.addAll(entitiesForType("u", txt, underlineRegex, matcher -> matcher.group(2), matcher -> Optional.empty())); + result.addAll(entitiesForType("u", txt, underlineRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0)); // > citate - result.addAll(entitiesForType("q", txt, citateRegex, matcher -> matcher.group(1), matcher -> Optional.empty())); + result.addAll(entitiesForType("q", txt, citateRegex, matcher -> matcher.group(1), matcher -> Optional.empty(), 1)); return result; } @@ -369,7 +369,20 @@ public class MessageUtils { return collectMatches(jidPattern, msg.getText()); } - private static List entitiesForType(String type, String input, String patternText, Function textGroup, Function> linkGroup) { + /** + * + * @param type Name of the entity + * @param input data to find matches + * @param patternText pattern to match + * @param textGroup function which return text representation + * @param linkGroup function which return link address + * @param endGroupId group id used to set end of entity (e.g. do not count linebreak as part of quote entity) + * @return list of entities + */ + private static List entitiesForType(String type, String input, String patternText, + Function textGroup, + Function> linkGroup, + int endGroupId) { List result = new ArrayList<>(); Pattern pattern = Pattern.compile(patternText); Matcher matcher = pattern.matcher(input); @@ -380,7 +393,7 @@ public class MessageUtils { Optional link = linkGroup.apply(matcher); link.ifPresent(entity::setUrl); entity.setStart(matcher.start()); - entity.setEnd(matcher.end()); + entity.setEnd(matcher.end(endGroupId)); result.add(entity); } return result; diff --git a/src/test/java/com/juick/MessageTest.java b/src/test/java/com/juick/MessageTest.java index 1d59a8db..4c37a678 100644 --- a/src/test/java/com/juick/MessageTest.java +++ b/src/test/java/com/juick/MessageTest.java @@ -191,5 +191,9 @@ public class MessageTest { assertThat(entities.stream().filter(e -> e.getType().equals("q")).count(), is(1L)); assertThat(entities.stream().filter(e -> e.getType().equals("u")).count(), is(1L)); assertThat(entities.stream().filter(e -> e.getType().equals("a")).count(), is(4L)); + Entity yo = entities.stream().filter(e -> e.getType().equals("q")).findFirst().orElseThrow(IllegalStateException::new); + assertThat(yo.getText(), is("how ?")); + assertThat(yo.getStart(), is(0)); + assertThat(yo.getEnd(), is(7)); } } -- cgit v1.2.3