/* * Copyright (C) 2008-2020, Juick * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.juick.util; import com.juick.model.Message; import com.juick.model.Tag; import com.juick.model.User; import com.juick.model.Entity; import org.apache.commons.lang3.StringUtils; import org.springframework.web.util.UriComponentsBuilder; import java.net.URI; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.LinkedHashSet; import java.util.List; import java.util.Optional; import java.util.Set; import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; /** * Created by aalexeev on 11/13/16. */ public class MessageUtils { private MessageUtils() { throw new IllegalStateException(); } public static String formatQuote(final String quote) { String result = quote; if (quote != null) { if (quote.length() > 50) { result = ">" + quote.substring(0, 47).replace('\n', ' ') + "...\n"; } else if (!quote.isEmpty()) { result = ">" + quote.replace('\n', ' ') + "\n"; } } return result; } private final static String urlWhiteSpacePrefix = "((?<=\\s)|(?<=\\A))"; private final static String urlRegex = "((?:ht|f)tps?://(?:www\\.)?([^\\/\\s\\n\\\"]+)/?[^\\]\\s\\n\\\"\\>]*)"; private final static String urlWithWhitespacesRegex = urlWhiteSpacePrefix + urlRegex; private final static Pattern regexLinks2 = Pattern.compile("((?<=\\s)|(?<=\\A))([\\[{])((?:ht|f)tps?://(?:www\\.)?([^/\\s\")!>]*)/?(?:[^]}](?<!>))*)([]}])"); private final static String replyNumberRegex = "((?<=\\s)|(?<=\\A))\\/(\\d+)((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; private final static String usernameRegex = "((?<=\\s)|(?<=\\A))@([\\w\\-]{2,16})((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; private final static Pattern usernamePattern = Pattern.compile(usernameRegex); private final static String jidRegex = "((?<=\\s)|(?<=\\A))@([\\w\\-\\.]+@[\\w\\-\\.]+)((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; private final static Pattern jidPattern = Pattern.compile(jidRegex); private final static String midRegex = "((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))#(\\d+)((?=\\s)|(?=\\Z)|(?=\\))|(?=\\.)|(?=\\,))"; private final static String ridRegex = "((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))#(\\d+)/(\\d+)((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; private final static String textUrlRegex = "\\[([^]]+)]\\[((?:ht|f)tps?://[^]]+)]"; private final static String textUrlRegex2 = "\\[([^]]+)]\\(((?:ht|f)tps?://[^)]+)\\)"; private final static String pidginUrlRegex = "(\\S+)\\s<((?:ht|f)tps?://[^&]+)>"; private final static String boldRegex = "((?<=\\s)|(?<=\\A)|(?<=\\p{Punct}))\\*([^\\*\\n<>]+)\\*((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; private final static String italicRegex = "((?<=\\s)|(?<=\\A))/([^\\/\\n<>]+)/((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; private final static String underlineRegex = "((?<=\\s)|(?<=\\A))_([^\\_\\n<>]+)_((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; private final static String citateRegex = "(?:(?<=\\n)|(?<=\\A))(?:>|>) *(.*)?(\\r?\\n|(?=\\Z))"; public static List<Entity> getEntities(Message msg) { String txt = msg.getText(); // http://juick.com/last?page=2 List<Entity> result = new ArrayList<>(entitiesForType("a", txt, urlWithWhitespacesRegex, matcher -> matcher.group(3), matcher -> Optional.of(matcher.group(2)), 0)); // [link text][http://juick.com/last?page=2] result.addAll(entitiesForType("a", txt, textUrlRegex, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)), 0)); // [link text](http://juick.com/last?page=2) result.addAll(entitiesForType("a", txt, textUrlRegex2, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)), 0)); // #12345 result.addAll(entitiesForType("a", txt, midRegex, matcher -> String.format("#%s", matcher.group(2)), matcher -> Optional.of("https://juick.com/m/" + matcher.group(2)), 0)); // #12345/65 result.addAll(entitiesForType("a", txt, ridRegex, matcher -> String.format("#%s/%s", matcher.group(2), matcher.group(3)), matcher -> Optional.of(String.format("https://juick.com/m/%s#%s", matcher.group(2), matcher.group(3))), 0)); // /12 result.addAll(entitiesForType("a", txt, replyNumberRegex, matcher -> "/" + matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/m/%d#%s", msg.getMid(), matcher.group(2))), 0)); // @username@jabber.org result.addAll(entitiesForType("a", txt, jidRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))), 0)); // @username result.addAll(entitiesForType("a", txt, usernameRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))), 0)); // *bold* result.addAll(entitiesForType("b", txt, boldRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0)); // /italic/ result.addAll(entitiesForType("i", txt, italicRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0)); // _underline_ result.addAll(entitiesForType("u", txt, underlineRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0)); // > citate result.addAll(entitiesForType("q", txt, citateRegex, matcher -> matcher.group(1), matcher -> Optional.empty(), 1)); return result; } public static String formatMessageCode(String msg) { msg = msg.replaceAll("&", "&"); msg = msg.replaceAll("<", "<"); msg = msg.replaceAll(">", ">"); // http://juick.com/last?page=2 // <a href="http://juick.com/last?page=2" rel="nofollow">http://juick.com/last?page=2</a> msg = msg.replaceAll(urlWithWhitespacesRegex, "$1<a href=\"$2\" rel=\"nofollow\">$2</a>"); // (http://juick.com/last?page=2) // (<a href="http://juick.com/last?page=2" rel="nofollow">http://juick.com/last?page=2</a>) Matcher m = regexLinks2.matcher(msg); StringBuffer sb = new StringBuffer(); while (m.find()) { String url = m.group(3).replace(" ", "%20").replaceAll("\\s+", StringUtils.EMPTY); m.appendReplacement(sb, "$1$2<a href=\"" + url + "\" rel=\"nofollow\">" + url + "</a>$5"); } m.appendTail(sb); msg = sb.toString(); return "<pre>" + msg + "</pre>"; } public static String formatMessage(String msg) { return formatMessage(msg, false); } public static String formatMessage(String msg, boolean compatibleWithDurov) { msg = msg.replaceAll("&", "&"); msg = msg.replaceAll("<", "<"); msg = msg.replaceAll(">", ">"); // -- // — if (!compatibleWithDurov) { msg = msg.replaceAll("((?<=\\s)|(?<=\\A))\\-\\-?((?=\\s)|(?=\\Z))", "$1—$2"); } // http://juick.com/last?page=2 // <a href="http://juick.com/last?page=2" rel="nofollow">juick.com</a> msg = msg.replaceAll(urlWithWhitespacesRegex, "$1<a href=\"$2\" rel=\"nofollow\">$3</a>"); // [link text][http://juick.com/last?page=2] // [link text](http://juick.com/last?page=2) // www.juick.com <http://juick.com/> // <a href="http://juick.com/last?page=2" rel="nofollow">link text</a> msg = msg.replaceAll(textUrlRegex, "<a href=\"$2\" rel=\"nofollow\">$1</a>"); msg = msg.replaceAll(textUrlRegex2, "<a href=\"$2\" rel=\"nofollow\">$1</a>"); msg = msg.replaceAll(pidginUrlRegex, "<a href=\"$2\" rel=\"nofollow\">$1</a>"); // #12345 // <a href="http://juick.com/12345">#12345</a> msg = msg.replaceAll(midRegex, "$1<a href=\"https://juick.com/m/$2\">#$2</a>$3"); // #12345/65 // <a href="http://juick.com/12345#65">#12345/65</a> msg = msg.replaceAll(ridRegex, "$1<a href=\"https://juick.com/m/$2#$3\">#$2/$3</a>$4"); // *bold* // <b>bold</b> msg = msg.replaceAll(boldRegex, "$1<b>$2</b>$3"); // /italic/ // <i>italic</i> msg = msg.replaceAll(italicRegex, "$1<i>$2</i>$3"); // _underline_ // <span class="u">underline</span> msg = msg.replaceAll(underlineRegex, "$1<u>$2</u>$3"); // /12 // <a href="#12">/12</a> msg = msg.replaceAll(replyNumberRegex, "$1<a href=\"#$2\">/$2</a>$3"); // @username@mastodon.social // <a href="http://juick.com/mention?username=username@mastodon.social/">@username@mastodon.social</a> if (compatibleWithDurov) { msg = msg.replaceAll(jidRegex, "$1<a href=\"http://juick.com/mention?username=$2\">@$2</a>$3"); } else { msg = msg.replaceAll(jidRegex, "$1<span class=\"h-card\"><a class=\"u-url\" href=\"http://juick.com/mention?username=$2\">@$2</a></span>$3"); } // @username // <a href="http://juick.com/username/">@username</a> if (compatibleWithDurov) { msg = msg.replaceAll(usernameRegex, "$1<a href=\"https://juick.com/$2/\">@$2</a>$3"); } else { msg = msg.replaceAll(usernameRegex, "$1<span class=\"h-card\"><a class=\"u-url\" href=\"https://juick.com/$2/\">@$2</a></span>$3"); } // (http://juick.com/last?page=2) // (<a href="http://juick.com/last?page=2" rel="nofollow">juick.com</a>) Matcher m = regexLinks2.matcher(msg); StringBuffer sb = new StringBuffer(); while (m.find()) { String url = m.group(3).replace(" ", "%20").replace("$", "%36").replaceAll("\\s+", StringUtils.EMPTY); m.appendReplacement(sb, "$1$2<a href=\"" + url + "\" rel=\"nofollow\">$4</a>$5"); } m.appendTail(sb); msg = sb.toString(); if (!compatibleWithDurov) { // > citate msg = msg.replaceAll(citateRegex, "<q>$1</q>"); msg = msg.replaceAll("</q><q>", "\n"); msg = msg.replaceAll("\n", "<br/>\n"); } return msg; } public static String formatHtml(Message jmsg) { StringBuilder sb = new StringBuilder(); boolean isReply = jmsg.getRid() > 0; String title = isReply ? "<b>Reply by @" : "<b>@"; String subtitle = isReply ? "<blockquote>" + jmsg.getReplyQuote() + "</blockquote>" : "<i>" + getTagsString(jmsg) + "</i>"; boolean isCode = jmsg.getTags().stream().anyMatch(t -> t.getName().equals("code")); sb.append(title).append(jmsg.getUser().getName()).append(":</b></br/>") .append(subtitle).append("<br/>") .append(isCode ? formatMessageCode(StringUtils.defaultString(jmsg.getText())) : formatMessage(StringUtils.defaultString(jmsg.getText()))).append("<br />"); if (StringUtils.isNotEmpty(jmsg.getAttachmentType())) { // FIXME: attachment does not serialized to xml if (jmsg.getAttachment() == null) { if (jmsg.getRid() > 0) { sb.append(String.format("<img src=\"http://i.juick.com/photos-1024/%d-%d.%s\" />", jmsg.getMid(), jmsg.getRid(), jmsg.getAttachmentType())); } else { sb.append(String.format("<img src=\"http://i.juick.com/photos-1024/%d.%s\" />", jmsg.getMid(), jmsg.getAttachmentType())); } } else { sb.append("<img src=\"").append(jmsg.getAttachment().getMedium().getUrl()).append("\" />"); } } return sb.toString(); } public static String getMessageHashTags(final Message jmsg) { StringBuilder hashtags = new StringBuilder(); for (Tag tag : jmsg.getTags()) { hashtags.append("#").append(tag).append(" "); } return hashtags.toString(); } public static String getMarkdownTags(final Message jmsg) { return jmsg.getTags().stream().map(t -> String.format("[%s](http://juick.com/tag/%s)", t.getName(), percentEncode(t.getName()))) .collect(Collectors.joining(", ")); } public static String getUserHtmlLink(final User user, final String webDomain) { if (user.getUri().toASCIIString().length() > 0) { return String.format("<a href=\"%s\">%s</a>", user.getUri(), user.getName()); } else { return String.format("<a href=\"https://%s/%s\">%s</a>", webDomain, user.getName(), user.getName()); } } // TODO: check if it is really needed public static String percentEncode(final String s) { return URLEncoder.encode(s, StandardCharsets.UTF_8).replace("+", "%20") .replace("*", "%2A").replace("%7E", "~"); } public static String formatMarkdownText(final Message msg) { return StringUtils.defaultString(msg.getText()) .replaceAll(replyNumberRegex, String.format("$1[/$2](https://juick.com/m/%d#$2)$3", msg.getMid())); } public static String attachmentUrl(final Message jmsg) { if (StringUtils.isEmpty(jmsg.getAttachmentType())) { return StringUtils.EMPTY; } // FIXME: attachment does not serialized to xml if (jmsg.getAttachment() == null) { if (jmsg.getRid() > 0) { return String.format("http://i.juick.com/photos-1024/%d-%d.%s", jmsg.getMid(), jmsg.getRid(), jmsg.getAttachmentType()); } else { return String.format("http://i.juick.com/photos-1024/%d.%s", jmsg.getMid(), jmsg.getAttachmentType()); } } else { return jmsg.getAttachment().getMedium().getUrl(); } } public static boolean replyStartsWithQuote(Message msg) { return msg.getRid() > 0 && StringUtils.defaultString(msg.getText()).startsWith(">"); } public static Set<Tag> parseTags(String strTags) { return StringUtils.isEmpty(strTags) ? Collections.emptySet() : Arrays.stream(strTags.split(" ")).map(Tag::new) .collect(Collectors.toCollection(LinkedHashSet::new)); } public static String getTagsString(Message msg) { StringBuilder builder = new StringBuilder(); Set<Tag> tags = msg.getTags(); if (!tags.isEmpty()) { for (Tag Tag : tags) builder.append(" *").append(Tag.getName()); if (msg.isFriendsOnly()) builder.append(" *friends"); if (msg.ReadOnly) builder.append(" *readonly"); } return builder.toString(); } public static boolean isPM(Message message) { return message.getMid() == 0; } public static boolean isReply(Message message) { return message.getRid() > 0; } public static String stripNonSafeUrls(String input) { // strip login urls try { Matcher urlMatcher = Pattern.compile(MessageUtils.urlRegex).matcher(input); while (urlMatcher.find()) { URI uri = URI.create(urlMatcher.group(0)); if (uri.getHost().equals("juick.com")) { UriComponentsBuilder uriComponentsBuilder = UriComponentsBuilder.fromUri(uri); uriComponentsBuilder.replaceQueryParam("hash"); input = input.replace(urlMatcher.group(0), uriComponentsBuilder.build().toUriString()); } } } catch (IllegalArgumentException | NullPointerException e) { return input; } return input; } private static List<String> collectMatches(Pattern pattern, String input) { Matcher matcher = pattern.matcher(input); List<String> result = new ArrayList<>(); while (matcher.find()) { result.add(matcher.group()); } return result; } public static List<String> getMentions(Message msg) { return collectMatches(usernamePattern, msg.getText()); } public static List<String> getGlobalMentions(Message msg) { return collectMatches(jidPattern, msg.getText()); } /** * * @param type Name of the entity * @param input data to find matches * @param patternText pattern to match * @param textGroup function which return text representation * @param linkGroup function which return link address * @param endGroupId group id used to set end of entity (e.g. do not count linebreak as part of quote entity) * @return list of entities */ private static List<Entity> entitiesForType(String type, String input, String patternText, Function<Matcher, String> textGroup, Function<Matcher, Optional<String>> linkGroup, int endGroupId) { List<Entity> result = new ArrayList<>(); Pattern pattern = Pattern.compile(patternText); Matcher matcher = pattern.matcher(input); while (matcher.find()) { Entity entity = new Entity(); entity.setType(type); entity.setText(textGroup.apply(matcher)); Optional<String> link = linkGroup.apply(matcher); link.ifPresent(entity::setUrl); entity.setStart(matcher.start()); entity.setEnd(matcher.end(endGroupId)); result.add(entity); } return result; } public static boolean isSensitive(Message msg) { return msg.getTags().stream().anyMatch((t) -> t.getName().equals("NSFW")); } }