/* * Copyright (C) 2008-2024, Juick * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ package com.juick.util; import com.juick.model.Entity; import com.juick.model.Message; import com.juick.model.Tag; import com.juick.model.User; import org.apache.commons.lang3.StringUtils; import org.springframework.web.util.UriComponentsBuilder; import java.net.URI; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; import java.util.*; import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; /** * Created by aalexeev on 11/13/16. */ public class MessageUtils { private MessageUtils() { throw new IllegalStateException(); } public static String formatQuote(final String quote) { return formatQuote(quote, false); } public static String formatQuote(final String quote, final boolean isHtml) { String result = quote; var prefix = isHtml ? "
" : ">"; var suffix = isHtml ? "
" : "\n"; if (quote != null) { if (quote.length() > 50) { result = prefix + StringUtils.abbreviate(quote, "…", 47).replace('\n', ' ') + suffix; } else if (!quote.isEmpty()) { result = prefix + quote.replace('\n', ' ') + suffix; } } return result; } private final static String urlWhiteSpacePrefix = "((?<=\\s)|(?<=\\A))"; private final static String urlRegex = "((?:(?:ht|f)tps?://(?:www\\.)?([^\\s()<>/?#]+)([^\\s()<>«»]*)?))"; private final static String urlWithWhitespacesRegex = urlWhiteSpacePrefix + urlRegex; private final static Pattern regexLinks2 = Pattern.compile("((?<=\\s)|(?<=\\A))([\\[{])((?:ht|f)tps?://(?:www\\.)?([^/\\s\")!>]*)/?(?:[^]}](?]+)\\*((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; private final static String italicRegex = "((?<=\\s)|(?<=\\A))/([^\\/\\n<>]+)/((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; private final static String underlineRegex = "((?<=\\s)|(?<=\\A))_([^\\_\\n<>]+)_((?=\\s)|(?=\\Z)|(?=\\p{Punct}))"; private final static String citateRegex = "(?:(?<=\\n)|(?<=\\A))(?:>|>) *(.*)?(\\r?\\n|(?=\\Z))"; public static List getEntities(Message msg) { String txt = msg.getText(); // http://juick.com/last?page=2 List result = new ArrayList<>(entitiesForType("a", txt, urlWithWhitespacesRegex, matcher -> matcher.group(3), matcher -> Optional.of(matcher.group(2)), 0)); // [link text][http://juick.com/last?page=2] result.addAll(entitiesForType("a", txt, textUrlRegex, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)), 0)); // [link text](http://juick.com/last?page=2) result.addAll(entitiesForType("a", txt, textUrlRegex2, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)), 0)); // #12345 result.addAll(entitiesForType("a", txt, midRegex, matcher -> String.format("#%s", matcher.group(2)), matcher -> Optional.of("https://juick.com/m/" + matcher.group(2)), 0)); // #12345/65 result.addAll(entitiesForType("a", txt, ridRegex, matcher -> String.format("#%s/%s", matcher.group(2), matcher.group(3)), matcher -> Optional.of(String.format("https://juick.com/m/%s#%s", matcher.group(2), matcher.group(3))), 0)); // /12 result.addAll(entitiesForType("a", txt, replyNumberRegex, matcher -> "/" + matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/m/%d#%s", msg.getMid(), matcher.group(2))), 0)); // @username@jabber.org result.addAll(entitiesForType("a", txt, jidRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))), 0)); // @username result.addAll(entitiesForType("a", txt, usernameRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))), 0)); // *bold* result.addAll(entitiesForType("b", txt, boldRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0)); // /italic/ result.addAll(entitiesForType("i", txt, italicRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0)); // _underline_ result.addAll(entitiesForType("u", txt, underlineRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0)); // > citate result.addAll(entitiesForType("q", txt, citateRegex, matcher -> matcher.group(1), matcher -> Optional.empty(), 1)); return result; } public static String formatMessageCode(String msg) { msg = msg.replaceAll("&", "&"); msg = msg.replaceAll("<", "<"); msg = msg.replaceAll(">", ">"); // http://juick.com/last?page=2 // http://juick.com/last?page=2 msg = msg.replaceAll(urlWithWhitespacesRegex, "$1$2"); // (http://juick.com/last?page=2) // (http://juick.com/last?page=2) Matcher m = regexLinks2.matcher(msg); StringBuffer sb = new StringBuffer(); while (m.find()) { String url = m.group(3).replace(" ", "%20").replaceAll("\\s+", StringUtils.EMPTY); m.appendReplacement(sb, "$1$2" + url + "$5"); } m.appendTail(sb); msg = sb.toString(); return "
" + msg + "
"; } public static String formatMessage(String msg) { return formatMessage(msg, false); } public static String formatMessage(String msg, boolean compatibleWithDurov) { msg = msg.replaceAll("&", "&"); msg = msg.replaceAll("<", "<"); msg = msg.replaceAll(">", ">"); // -- // — if (!compatibleWithDurov) { msg = msg.replaceAll("((?<=\\s)|(?<=\\A))\\-\\-?((?=\\s)|(?=\\Z))", "$1—$2"); } // http://juick.com/last?page=2 // juick.com msg = msg.replaceAll(urlWithWhitespacesRegex, "$1$3"); // [link text][http://juick.com/last?page=2] // [link text](http://juick.com/last?page=2) // www.juick.com // link text msg = msg.replaceAll(textUrlRegex, "$1"); msg = msg.replaceAll(textUrlRegex2, "$1"); msg = msg.replaceAll(pidginUrlRegex, "$1"); // #12345 // #12345 msg = msg.replaceAll(midRegex, "$1#$2$3"); // #12345/65 // #12345/65 msg = msg.replaceAll(ridRegex, "$1#$2/$3$4"); // *bold* // bold msg = msg.replaceAll(boldRegex, "$1$2$3"); // /italic/ // italic msg = msg.replaceAll(italicRegex, "$1$2$3"); // _underline_ // underline msg = msg.replaceAll(underlineRegex, "$1$2$3"); // /12 // /12 if (!compatibleWithDurov) { msg = msg.replaceAll(replyNumberRegex, "$1/$2$3"); } // @username@mastodon.social // @username@mastodon.social if (compatibleWithDurov) { msg = msg.replaceAll(jidRegex, "$1@$2$3"); } else { msg = msg.replaceAll(jidRegex, "$1@$2$3"); } // @username // @username if (compatibleWithDurov) { msg = msg.replaceAll(usernameRegex, "$1@$2$3"); } else { msg = msg.replaceAll(usernameRegex, "$1@$2$3"); } // (http://juick.com/last?page=2) // (juick.com) Matcher m = regexLinks2.matcher(msg); StringBuffer sb = new StringBuffer(); while (m.find()) { String url = m.group(3).replace(" ", "%20").replace("$", "%36").replaceAll("\\s+", StringUtils.EMPTY); m.appendReplacement(sb, "$1$2$4$5"); } m.appendTail(sb); msg = sb.toString(); // > citate msg = msg.replaceAll(citateRegex, "
$1
"); msg = msg.replaceAll("
", "\n"); if (!compatibleWithDurov) { msg = msg.replaceAll("\n", "
\n"); } return msg; } public static String formatHtml(Message jmsg) { StringBuilder sb = new StringBuilder(); boolean isReply = jmsg.getRid() > 0; String title = isReply ? "Reply by @" : "@"; String subtitle = isReply ? "
" + jmsg.getReplyQuote() + "
" : "" + getTagsString(jmsg) + ""; boolean isCode = jmsg.getTags().stream().anyMatch(t -> t.getName().equals("code")); sb.append(title).append(jmsg.getUser().getName()).append(":

") .append(subtitle).append("
") .append(isCode ? formatMessageCode(StringUtils.defaultString(jmsg.getText())) : formatMessage(StringUtils.defaultString(jmsg.getText()))).append("
"); if (StringUtils.isNotEmpty(jmsg.getAttachmentType())) { // FIXME: attachment does not serialized to xml if (jmsg.getAttachment() == null) { if (jmsg.getRid() > 0) { sb.append(String.format("", jmsg.getMid(), jmsg.getRid(), jmsg.getAttachmentType())); } else { sb.append(String.format("", jmsg.getMid(), jmsg.getAttachmentType())); } } else { sb.append(""); } } return sb.toString(); } public static String getMessageHashTags(final Message jmsg) { StringBuilder hashtags = new StringBuilder(); for (Tag tag : jmsg.getTags()) { hashtags.append("#").append(tag).append(" "); } return hashtags.toString(); } public static String getMarkdownTags(final Message jmsg) { return jmsg.getTags().stream().map(t -> String.format("[%s](http://juick.com/tag/%s)", t.getName(), percentEncode(t.getName()))) .collect(Collectors.joining(", ")); } public static String getUserHtmlLink(final User user, final String webDomain) { if (user.getUri().toASCIIString().length() > 0) { return String.format("%s", user.getUri(), user.getName()); } else { return String.format("%s", webDomain, user.getName(), user.getName()); } } // TODO: check if it is really needed public static String percentEncode(final String s) { return URLEncoder.encode(s, StandardCharsets.UTF_8).replace("+", "%20") .replace("*", "%2A").replace("%7E", "~"); } public static String attachmentUrl(final Message jmsg) { if (StringUtils.isEmpty(jmsg.getAttachmentType())) { return StringUtils.EMPTY; } // FIXME: attachment does not serialized to xml if (jmsg.getAttachment() == null) { if (jmsg.getRid() > 0) { return String.format("http://i.juick.com/photos-1024/%d-%d.%s", jmsg.getMid(), jmsg.getRid(), jmsg.getAttachmentType()); } else { return String.format("http://i.juick.com/photos-1024/%d.%s", jmsg.getMid(), jmsg.getAttachmentType()); } } else { return jmsg.getAttachment().getMedium().getUrl(); } } public static boolean replyStartsWithQuote(Message msg) { return msg.getRid() > 0 && StringUtils.defaultString(msg.getText()).startsWith(">"); } public static Set parseTags(String strTags) { return StringUtils.isEmpty(strTags) ? Collections.emptySet() : Arrays.stream(strTags.split(" ")).map(Tag::new) .collect(Collectors.toCollection(LinkedHashSet::new)); } public static String getTagsString(Message msg) { StringBuilder builder = new StringBuilder(); Set tags = msg.getTags(); if (!tags.isEmpty()) { for (Tag Tag : tags) builder.append(" *").append(Tag.getName()); if (msg.isFriendsOnly()) builder.append(" *friends"); if (msg.ReadOnly) builder.append(" *readonly"); } return builder.toString(); } public static boolean isPM(Message message) { return message.getMid() == 0; } public static boolean isReply(Message message) { return message.getRid() > 0; } public static String stripNonSafeUrls(String input) { // strip login urls try { Matcher urlMatcher = Pattern.compile(MessageUtils.urlRegex).matcher(input); while (urlMatcher.find()) { URI uri = URI.create(urlMatcher.group(0)); if (uri.getHost().equals("juick.com")) { UriComponentsBuilder uriComponentsBuilder = UriComponentsBuilder.fromUri(uri); uriComponentsBuilder.replaceQueryParam("hash"); input = input.replace(urlMatcher.group(0), uriComponentsBuilder.build().toUriString()); } } } catch (IllegalArgumentException | NullPointerException e) { return input; } return input; } private static List collectMatches(Pattern pattern, String input) { Matcher matcher = pattern.matcher(input); List result = new ArrayList<>(); while (matcher.find()) { result.add(matcher.group()); } return result; } public static List getMentions(Message msg) { return collectMatches(usernamePattern, msg.getText()); } public static List getGlobalMentions(Message msg) { return collectMatches(jidPattern, msg.getText()); } /** * * @param type Name of the entity * @param input data to find matches * @param patternText pattern to match * @param textGroup function which return text representation * @param linkGroup function which return link address * @param endGroupId group id used to set end of entity (e.g. do not count linebreak as part of quote entity) * @return list of entities */ private static List entitiesForType(String type, String input, String patternText, Function textGroup, Function> linkGroup, int endGroupId) { List result = new ArrayList<>(); Pattern pattern = Pattern.compile(patternText); Matcher matcher = pattern.matcher(input); while (matcher.find()) { Entity entity = new Entity(); entity.setType(type); entity.setText(textGroup.apply(matcher)); Optional link = linkGroup.apply(matcher); link.ifPresent(entity::setUrl); entity.setStart(matcher.start()); entity.setEnd(matcher.end(endGroupId)); result.add(entity); } return result; } public static boolean isSensitive(Message msg) { return msg.getTags().stream().anyMatch((t) -> t.getName().equals("NSFW")); } }