From 445e6bbc61da68149ccd12e05cf781cffcf05aee Mon Sep 17 00:00:00 2001 From: Vitaly Takmazov Date: Thu, 13 Dec 2018 10:49:16 +0300 Subject: do not count linebreak as part of entity --- src/main/java/com/juick/util/MessageUtils.java | 41 +++++++++++++++++--------- 1 file changed, 27 insertions(+), 14 deletions(-) (limited to 'src/main/java/com/juick/util/MessageUtils.java') diff --git a/src/main/java/com/juick/util/MessageUtils.java b/src/main/java/com/juick/util/MessageUtils.java index cc0d7b12..5669a454 100644 --- a/src/main/java/com/juick/util/MessageUtils.java +++ b/src/main/java/com/juick/util/MessageUtils.java @@ -94,29 +94,29 @@ public class MessageUtils { String txt = msg.getText(); // http://juick.com/last?page=2 - List result = new ArrayList<>(entitiesForType("a", txt, urlWithWhitespacesRegex, matcher -> matcher.group(3), matcher -> Optional.of(matcher.group(2)))); + List result = new ArrayList<>(entitiesForType("a", txt, urlWithWhitespacesRegex, matcher -> matcher.group(3), matcher -> Optional.of(matcher.group(2)), 0)); // [link text][http://juick.com/last?page=2] - result.addAll(entitiesForType("a", txt, textUrlRegex, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)))); + result.addAll(entitiesForType("a", txt, textUrlRegex, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)), 0)); // [link text](http://juick.com/last?page=2) - result.addAll(entitiesForType("a", txt, textUrlRegex2, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)))); + result.addAll(entitiesForType("a", txt, textUrlRegex2, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)), 0)); // #12345 - result.addAll(entitiesForType("a", txt, midRegex, matcher -> String.format("#%s", matcher.group(2)), matcher -> Optional.of("https://juick.com/m/" + matcher.group(2)))); + result.addAll(entitiesForType("a", txt, midRegex, matcher -> String.format("#%s", matcher.group(2)), matcher -> Optional.of("https://juick.com/m/" + matcher.group(2)), 0)); // #12345/65 - result.addAll(entitiesForType("a", txt, ridRegex, matcher -> String.format("#%s/%s", matcher.group(2), matcher.group(3)), matcher -> Optional.of(String.format("https://juick.com/m/%s#%s", matcher.group(2), matcher.group(3))))); + result.addAll(entitiesForType("a", txt, ridRegex, matcher -> String.format("#%s/%s", matcher.group(2), matcher.group(3)), matcher -> Optional.of(String.format("https://juick.com/m/%s#%s", matcher.group(2), matcher.group(3))), 0)); // /12 - result.addAll(entitiesForType("a", txt, replyNumberRegex, matcher -> "/" + matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/m/%d#%s", msg.getMid(), matcher.group(2))))); + result.addAll(entitiesForType("a", txt, replyNumberRegex, matcher -> "/" + matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/m/%d#%s", msg.getMid(), matcher.group(2))), 0)); // @username@jabber.org - result.addAll(entitiesForType("a", txt, jidRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))))); + result.addAll(entitiesForType("a", txt, jidRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))), 0)); // @username - result.addAll(entitiesForType("a", txt, usernameRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))))); + result.addAll(entitiesForType("a", txt, usernameRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))), 0)); // *bold* - result.addAll(entitiesForType("b", txt, boldRegex, matcher -> matcher.group(2), matcher -> Optional.empty())); + result.addAll(entitiesForType("b", txt, boldRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0)); // /italic/ - result.addAll(entitiesForType("i", txt, italicRegex, matcher -> matcher.group(2), matcher -> Optional.empty())); + result.addAll(entitiesForType("i", txt, italicRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0)); // _underline_ - result.addAll(entitiesForType("u", txt, underlineRegex, matcher -> matcher.group(2), matcher -> Optional.empty())); + result.addAll(entitiesForType("u", txt, underlineRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0)); // > citate - result.addAll(entitiesForType("q", txt, citateRegex, matcher -> matcher.group(1), matcher -> Optional.empty())); + result.addAll(entitiesForType("q", txt, citateRegex, matcher -> matcher.group(1), matcher -> Optional.empty(), 1)); return result; } @@ -369,7 +369,20 @@ public class MessageUtils { return collectMatches(jidPattern, msg.getText()); } - private static List entitiesForType(String type, String input, String patternText, Function textGroup, Function> linkGroup) { + /** + * + * @param type Name of the entity + * @param input data to find matches + * @param patternText pattern to match + * @param textGroup function which return text representation + * @param linkGroup function which return link address + * @param endGroupId group id used to set end of entity (e.g. do not count linebreak as part of quote entity) + * @return list of entities + */ + private static List entitiesForType(String type, String input, String patternText, + Function textGroup, + Function> linkGroup, + int endGroupId) { List result = new ArrayList<>(); Pattern pattern = Pattern.compile(patternText); Matcher matcher = pattern.matcher(input); @@ -380,7 +393,7 @@ public class MessageUtils { Optional link = linkGroup.apply(matcher); link.ifPresent(entity::setUrl); entity.setStart(matcher.start()); - entity.setEnd(matcher.end()); + entity.setEnd(matcher.end(endGroupId)); result.add(entity); } return result; -- cgit v1.2.3 From 7424474bb1c5e72a684544bb17498578e12f084e Mon Sep 17 00:00:00 2001 From: Vitaly Takmazov Date: Tue, 22 Jan 2019 20:36:11 +0300 Subject: Fix HTML escaping --- src/main/java/com/juick/util/MessageUtils.java | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/main/java/com/juick/util/MessageUtils.java') diff --git a/src/main/java/com/juick/util/MessageUtils.java b/src/main/java/com/juick/util/MessageUtils.java index 5669a454..84c29807 100644 --- a/src/main/java/com/juick/util/MessageUtils.java +++ b/src/main/java/com/juick/util/MessageUtils.java @@ -144,6 +144,9 @@ public class MessageUtils { } public static String formatMessage(String msg) { + msg = msg.replaceAll("&", "&"); + msg = msg.replaceAll("<", "<"); + msg = msg.replaceAll(">", ">"); // -- // — msg = msg.replaceAll("((?<=\\s)|(?<=\\A))\\-\\-?((?=\\s)|(?=\\Z))", "$1—$2"); -- cgit v1.2.3 From dd7b6d46a7d0efd7198415b6bcc3e86e1d921821 Mon Sep 17 00:00:00 2001 From: Vitaly Takmazov Date: Tue, 22 Jan 2019 20:41:12 +0300 Subject: fix quote escaping --- src/main/java/com/juick/util/MessageUtils.java | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/main/java/com/juick/util/MessageUtils.java') diff --git a/src/main/java/com/juick/util/MessageUtils.java b/src/main/java/com/juick/util/MessageUtils.java index 84c29807..5cc8bb97 100644 --- a/src/main/java/com/juick/util/MessageUtils.java +++ b/src/main/java/com/juick/util/MessageUtils.java @@ -144,6 +144,11 @@ public class MessageUtils { } public static String formatMessage(String msg) { + // > citate + msg = msg.replaceAll(citateRegex, "$1"); + msg = msg.replaceAll("", "\n"); + + msg = msg.replaceAll("&", "&"); msg = msg.replaceAll("<", "<"); msg = msg.replaceAll(">", ">"); @@ -204,10 +209,6 @@ public class MessageUtils { m.appendTail(sb); msg = sb.toString(); - // > citate - msg = msg.replaceAll(citateRegex, "$1"); - msg = msg.replaceAll("", "\n"); - msg = msg.replaceAll("\n", "
\n"); return msg; } -- cgit v1.2.3 From 0b418caf401c53eb73721a324cd13041c185d0fb Mon Sep 17 00:00:00 2001 From: Vitaly Takmazov Date: Tue, 22 Jan 2019 20:52:52 +0300 Subject: Fix quote regex --- src/main/java/com/juick/util/MessageUtils.java | 10 ++++------ src/test/java/com/juick/MessageTest.java | 9 +++++++++ 2 files changed, 13 insertions(+), 6 deletions(-) (limited to 'src/main/java/com/juick/util/MessageUtils.java') diff --git a/src/main/java/com/juick/util/MessageUtils.java b/src/main/java/com/juick/util/MessageUtils.java index 5cc8bb97..72c9bd04 100644 --- a/src/main/java/com/juick/util/MessageUtils.java +++ b/src/main/java/com/juick/util/MessageUtils.java @@ -87,7 +87,7 @@ public class MessageUtils { private final static String underlineRegex = "((?<=\\s)|(?<=\\A))_([^\\_\\n<>]+)_((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; - private final static String citateRegex = "(?:(?<=\\n)|(?<=\\A))\\> *(.*)?(\\n|(?=\\Z))"; + private final static String citateRegex = "(?:(?<=\\n)|(?<=\\A))> *(.*)?(\\n|(?=\\Z))"; public static List getEntities(Message msg) { @@ -144,14 +144,12 @@ public class MessageUtils { } public static String formatMessage(String msg) { - // > citate - msg = msg.replaceAll(citateRegex, "$1"); - msg = msg.replaceAll("
", "\n"); - - msg = msg.replaceAll("&", "&"); msg = msg.replaceAll("<", "<"); msg = msg.replaceAll(">", ">"); + // > citate + msg = msg.replaceAll(citateRegex, "$1"); + msg = msg.replaceAll("", "\n"); // -- // — msg = msg.replaceAll("((?<=\\s)|(?<=\\A))\\-\\-?((?=\\s)|(?=\\Z))", "$1—$2"); diff --git a/src/test/java/com/juick/MessageTest.java b/src/test/java/com/juick/MessageTest.java index 2d12a3df..0b48efa1 100644 --- a/src/test/java/com/juick/MessageTest.java +++ b/src/test/java/com/juick/MessageTest.java @@ -206,4 +206,13 @@ public class MessageTest { Remark remark = new Remark(options); String parsed = remark.convertFragment(text); } + @Test + public void messageFormatTest() { + String msg = "> quote\nmessage"; + assertThat(MessageUtils.formatMessage(msg), is("quotemessage")); + String brokenComment = "