/*
* Copyright (C) 2008-2024, Juick
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package com.juick.util;
import com.juick.model.Entity;
import com.juick.model.Message;
import com.juick.model.Tag;
import com.juick.model.User;
import org.apache.commons.lang3.StringUtils;
import org.springframework.web.util.UriComponentsBuilder;
import java.net.URI;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.*;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
/**
* Created by aalexeev on 11/13/16.
*/
public class MessageUtils {
private MessageUtils() {
throw new IllegalStateException();
}
public static String formatQuote(final String quote) {
String result = quote;
if (quote != null) {
if (quote.length() > 50) {
result = ">" + StringUtils.abbreviate(quote, "…", 47).replace('\n', ' ') + "\n";
} else if (!quote.isEmpty()) {
result = ">" + quote.replace('\n', ' ') + "\n";
}
}
return result;
}
private final static String urlWhiteSpacePrefix = "((?<=\\s)|(?<=\\A))";
private final static String urlRegex = "((?:(?:ht|f)tps?://(?:www\\.)?([^\\s()<>/?#]+)([^\\s()<>«»]*)?))";
private final static String urlWithWhitespacesRegex =
urlWhiteSpacePrefix + urlRegex;
private final static Pattern regexLinks2 = Pattern.compile("((?<=\\s)|(?<=\\A))([\\[{])((?:ht|f)tps?://(?:www\\.)?([^/\\s\")!>]*)/?(?:[^]}](?]+)\\*((?=\\s)|(?=\\Z)|(?=\\p{Punct}))";
private final static String italicRegex = "((?<=\\s)|(?<=\\A))/([^\\/\\n<>]+)/((?=\\s)|(?=\\Z)|(?=\\p{Punct}))";
private final static String underlineRegex = "((?<=\\s)|(?<=\\A))_([^\\_\\n<>]+)_((?=\\s)|(?=\\Z)|(?=\\p{Punct}))";
private final static String citateRegex = "(?:(?<=\\n)|(?<=\\A))(?:>|>) *(.*)?(\\r?\\n|(?=\\Z))";
public static List getEntities(Message msg) {
String txt = msg.getText();
// http://juick.com/last?page=2
List result = new ArrayList<>(entitiesForType("a", txt, urlWithWhitespacesRegex, matcher -> matcher.group(3), matcher -> Optional.of(matcher.group(2)), 0));
// [link text][http://juick.com/last?page=2]
result.addAll(entitiesForType("a", txt, textUrlRegex, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)), 0));
// [link text](http://juick.com/last?page=2)
result.addAll(entitiesForType("a", txt, textUrlRegex2, matcher -> matcher.group(1), matcher -> Optional.of(matcher.group(2)), 0));
// #12345
result.addAll(entitiesForType("a", txt, midRegex, matcher -> String.format("#%s", matcher.group(2)), matcher -> Optional.of("https://juick.com/m/" + matcher.group(2)), 0));
// #12345/65
result.addAll(entitiesForType("a", txt, ridRegex, matcher -> String.format("#%s/%s", matcher.group(2), matcher.group(3)), matcher -> Optional.of(String.format("https://juick.com/m/%s#%s", matcher.group(2), matcher.group(3))), 0));
// /12
result.addAll(entitiesForType("a", txt, replyNumberRegex, matcher -> "/" + matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/m/%d#%s", msg.getMid(), matcher.group(2))), 0));
// @username@jabber.org
result.addAll(entitiesForType("a", txt, jidRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))), 0));
// @username
result.addAll(entitiesForType("a", txt, usernameRegex, matcher -> matcher.group(2), matcher -> Optional.of(String.format("https://juick.com/%s", matcher.group(2))), 0));
// *bold*
result.addAll(entitiesForType("b", txt, boldRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0));
// /italic/
result.addAll(entitiesForType("i", txt, italicRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0));
// _underline_
result.addAll(entitiesForType("u", txt, underlineRegex, matcher -> matcher.group(2), matcher -> Optional.empty(), 0));
// > citate
result.addAll(entitiesForType("q", txt, citateRegex, matcher -> matcher.group(1), matcher -> Optional.empty(), 1));
return result;
}
public static String formatMessageCode(String msg) {
msg = msg.replaceAll("&", "&");
msg = msg.replaceAll("<", "<");
msg = msg.replaceAll(">", ">");
// http://juick.com/last?page=2
// http://juick.com/last?page=2
msg = msg.replaceAll(urlWithWhitespacesRegex, "$1$2");
// (http://juick.com/last?page=2)
// (http://juick.com/last?page=2)
Matcher m = regexLinks2.matcher(msg);
StringBuffer sb = new StringBuffer();
while (m.find()) {
String url = m.group(3).replace(" ", "%20").replaceAll("\\s+", StringUtils.EMPTY);
m.appendReplacement(sb, "$1$2" + url + "$5");
}
m.appendTail(sb);
msg = sb.toString();
return "" + msg + "
";
}
public static String formatMessage(String msg) {
return formatMessage(msg, false);
}
public static String formatMessage(String msg, boolean compatibleWithDurov) {
msg = msg.replaceAll("&", "&");
msg = msg.replaceAll("<", "<");
msg = msg.replaceAll(">", ">");
// --
// —
if (!compatibleWithDurov) {
msg = msg.replaceAll("((?<=\\s)|(?<=\\A))\\-\\-?((?=\\s)|(?=\\Z))", "$1—$2");
}
// http://juick.com/last?page=2
// juick.com
msg = msg.replaceAll(urlWithWhitespacesRegex, "$1$3");
// [link text][http://juick.com/last?page=2]
// [link text](http://juick.com/last?page=2)
// www.juick.com
// link text
msg = msg.replaceAll(textUrlRegex, "$1");
msg = msg.replaceAll(textUrlRegex2, "$1");
msg = msg.replaceAll(pidginUrlRegex, "$1");
// #12345
// #12345
msg = msg.replaceAll(midRegex, "$1#$2$3");
// #12345/65
// #12345/65
msg = msg.replaceAll(ridRegex, "$1#$2/$3$4");
// *bold*
// bold
msg = msg.replaceAll(boldRegex, "$1$2$3");
// /italic/
// italic
msg = msg.replaceAll(italicRegex, "$1$2$3");
// _underline_
// underline
msg = msg.replaceAll(underlineRegex, "$1$2$3");
// /12
// /12
msg = msg.replaceAll(replyNumberRegex, "$1/$2$3");
// @username@mastodon.social
// @username@mastodon.social
if (compatibleWithDurov) {
msg = msg.replaceAll(jidRegex, "$1@$2$3");
} else {
msg = msg.replaceAll(jidRegex, "$1@$2$3");
}
// @username
// @username
if (compatibleWithDurov) {
msg = msg.replaceAll(usernameRegex, "$1@$2$3");
} else {
msg = msg.replaceAll(usernameRegex, "$1@$2$3");
}
// (http://juick.com/last?page=2)
// (juick.com)
Matcher m = regexLinks2.matcher(msg);
StringBuffer sb = new StringBuffer();
while (m.find()) {
String url = m.group(3).replace(" ", "%20").replace("$", "%36").replaceAll("\\s+", StringUtils.EMPTY);
m.appendReplacement(sb, "$1$2$4$5");
}
m.appendTail(sb);
msg = sb.toString();
if (!compatibleWithDurov) {
// > citate
msg = msg.replaceAll(citateRegex, "$1
");
msg = msg.replaceAll("", "\n");
msg = msg.replaceAll("\n", "
\n");
}
return msg;
}
public static String formatHtml(Message jmsg) {
StringBuilder sb = new StringBuilder();
boolean isReply = jmsg.getRid() > 0;
String title = isReply ? "Reply by @" : "@";
String subtitle = isReply ? "" + jmsg.getReplyQuote() + "
" : "" + getTagsString(jmsg) + "";
boolean isCode = jmsg.getTags().stream().anyMatch(t -> t.getName().equals("code"));
sb.append(title).append(jmsg.getUser().getName()).append(":")
.append(subtitle).append("
")
.append(isCode ? formatMessageCode(StringUtils.defaultString(jmsg.getText()))
: formatMessage(StringUtils.defaultString(jmsg.getText()))).append("
");
if (StringUtils.isNotEmpty(jmsg.getAttachmentType())) {
// FIXME: attachment does not serialized to xml
if (jmsg.getAttachment() == null) {
if (jmsg.getRid() > 0) {
sb.append(String.format("", jmsg.getMid(),
jmsg.getRid(), jmsg.getAttachmentType()));
} else {
sb.append(String.format("", jmsg.getMid(),
jmsg.getAttachmentType()));
}
} else {
sb.append("");
}
}
return sb.toString();
}
public static String getMessageHashTags(final Message jmsg) {
StringBuilder hashtags = new StringBuilder();
for (Tag tag : jmsg.getTags()) {
hashtags.append("#").append(tag).append(" ");
}
return hashtags.toString();
}
public static String getMarkdownTags(final Message jmsg) {
return jmsg.getTags().stream().map(t -> String.format("[%s](http://juick.com/tag/%s)", t.getName(), percentEncode(t.getName())))
.collect(Collectors.joining(", "));
}
public static String getUserHtmlLink(final User user, final String webDomain) {
if (user.getUri().toASCIIString().length() > 0) {
return String.format("%s", user.getUri(), user.getName());
} else {
return String.format("%s", webDomain, user.getName(), user.getName());
}
}
// TODO: check if it is really needed
public static String percentEncode(final String s) {
return URLEncoder.encode(s, StandardCharsets.UTF_8).replace("+", "%20")
.replace("*", "%2A").replace("%7E", "~");
}
public static String formatMarkdownText(final Message msg) {
return StringUtils.defaultString(msg.getText())
.replaceAll(replyNumberRegex, String.format("$1[/$2](https://juick.com/m/%d#$2)$3", msg.getMid()));
}
public static String attachmentUrl(final Message jmsg) {
if (StringUtils.isEmpty(jmsg.getAttachmentType())) {
return StringUtils.EMPTY;
}
// FIXME: attachment does not serialized to xml
if (jmsg.getAttachment() == null) {
if (jmsg.getRid() > 0) {
return String.format("http://i.juick.com/photos-1024/%d-%d.%s", jmsg.getMid(),
jmsg.getRid(), jmsg.getAttachmentType());
} else {
return String.format("http://i.juick.com/photos-1024/%d.%s", jmsg.getMid(),
jmsg.getAttachmentType());
}
} else {
return jmsg.getAttachment().getMedium().getUrl();
}
}
public static boolean replyStartsWithQuote(Message msg) {
return msg.getRid() > 0 && StringUtils.defaultString(msg.getText()).startsWith(">");
}
public static Set parseTags(String strTags) {
return StringUtils.isEmpty(strTags) ? Collections.emptySet()
: Arrays.stream(strTags.split(" ")).map(Tag::new)
.collect(Collectors.toCollection(LinkedHashSet::new));
}
public static String getTagsString(Message msg) {
StringBuilder builder = new StringBuilder();
Set tags = msg.getTags();
if (!tags.isEmpty()) {
for (Tag Tag : tags)
builder.append(" *").append(Tag.getName());
if (msg.isFriendsOnly())
builder.append(" *friends");
if (msg.ReadOnly)
builder.append(" *readonly");
}
return builder.toString();
}
public static boolean isPM(Message message) {
return message.getMid() == 0;
}
public static boolean isReply(Message message) {
return message.getRid() > 0;
}
public static String stripNonSafeUrls(String input) {
// strip login urls
try {
Matcher urlMatcher = Pattern.compile(MessageUtils.urlRegex).matcher(input);
while (urlMatcher.find()) {
URI uri = URI.create(urlMatcher.group(0));
if (uri.getHost().equals("juick.com")) {
UriComponentsBuilder uriComponentsBuilder = UriComponentsBuilder.fromUri(uri);
uriComponentsBuilder.replaceQueryParam("hash");
input = input.replace(urlMatcher.group(0), uriComponentsBuilder.build().toUriString());
}
}
} catch (IllegalArgumentException | NullPointerException e) {
return input;
}
return input;
}
private static List collectMatches(Pattern pattern, String input) {
Matcher matcher = pattern.matcher(input);
List result = new ArrayList<>();
while (matcher.find()) {
result.add(matcher.group());
}
return result;
}
public static List getMentions(Message msg) {
return collectMatches(usernamePattern, msg.getText());
}
public static List getGlobalMentions(Message msg) {
return collectMatches(jidPattern, msg.getText());
}
/**
*
* @param type Name of the entity
* @param input data to find matches
* @param patternText pattern to match
* @param textGroup function which return text representation
* @param linkGroup function which return link address
* @param endGroupId group id used to set end of entity (e.g. do not count linebreak as part of quote entity)
* @return list of entities
*/
private static List entitiesForType(String type, String input, String patternText,
Function textGroup,
Function> linkGroup,
int endGroupId) {
List result = new ArrayList<>();
Pattern pattern = Pattern.compile(patternText);
Matcher matcher = pattern.matcher(input);
while (matcher.find()) {
Entity entity = new Entity();
entity.setType(type);
entity.setText(textGroup.apply(matcher));
Optional link = linkGroup.apply(matcher);
link.ifPresent(entity::setUrl);
entity.setStart(matcher.start());
entity.setEnd(matcher.end(endGroupId));
result.add(entity);
}
return result;
}
public static boolean isSensitive(Message msg) {
return msg.getTags().stream().anyMatch((t) -> t.getName().equals("NSFW"));
}
}