diff options
author | Vitaly Takmazov | 2020-06-03 17:32:55 +0300 |
---|---|---|
committer | Vitaly Takmazov | 2020-06-03 17:32:55 +0300 |
commit | b0bdce22447ff815512ae7612dd4ab4a5d9a409a (patch) | |
tree | 9c33f07584697130a316c6db404844207494a832 /src/main/java | |
parent | cafe4644cf82267397c21433c0c8965ff90c1073 (diff) |
sape: switch to xml output
Diffstat (limited to 'src/main/java')
-rw-r--r-- | src/main/java/com/juick/www/ad/SapeService.java | 4 | ||||
-rw-r--r-- | src/main/java/com/juick/www/ad/models/Page.java | 37 | ||||
-rw-r--r-- | src/main/java/com/juick/www/ad/models/Site.java | 87 | ||||
-rw-r--r-- | src/main/java/ru/sape/Sape.java | 6 | ||||
-rw-r--r-- | src/main/java/ru/sape/SapeConnection.java | 17 | ||||
-rw-r--r-- | src/main/java/ru/sape/SapePageLinks.java | 66 |
6 files changed, 155 insertions, 62 deletions
diff --git a/src/main/java/com/juick/www/ad/SapeService.java b/src/main/java/com/juick/www/ad/SapeService.java index 4ef4a213..3c35f320 100644 --- a/src/main/java/com/juick/www/ad/SapeService.java +++ b/src/main/java/com/juick/www/ad/SapeService.java @@ -60,8 +60,8 @@ public class SapeService { && visitor.isAnonymous(); model.addAttribute("showAdv", showAdv); if (showAdv) { - String links = sape.getPageLinks(requestURI, sapeCookie).render(); - model.addAttribute("links", links); + sape.getPageLinks(requestURI, sapeCookie) + .ifPresent(sapePageLinks -> model.addAttribute("links", sapePageLinks.render())); } } } diff --git a/src/main/java/com/juick/www/ad/models/Page.java b/src/main/java/com/juick/www/ad/models/Page.java new file mode 100644 index 00000000..d5bb8ae3 --- /dev/null +++ b/src/main/java/com/juick/www/ad/models/Page.java @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2008-2020, Juick + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +package com.juick.www.ad.models; + +import java.util.List; + +public class Page { + private final String uri; + private final List<String> links; + + public Page(String uri, List<String> links) { + this.uri = uri; + this.links = links; + } + + public String getUri() { + return uri; + } + public List<String> getLinks() { + return links; + } +} diff --git a/src/main/java/com/juick/www/ad/models/Site.java b/src/main/java/com/juick/www/ad/models/Site.java new file mode 100644 index 00000000..fbe1c033 --- /dev/null +++ b/src/main/java/com/juick/www/ad/models/Site.java @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2008-2020, Juick + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as + * published by the Free Software Foundation, either version 3 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +package com.juick.www.ad.models; + +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +public class Site { + private final String siteUrl; + private final String delimiter; + private final List<Page> pages; + private final String code; + + public Site(String siteUrl, String code, String delimiter, List<Page> pages) { + this.siteUrl = siteUrl; + this.code = code; + this.delimiter = delimiter; + this.pages = pages; + } + + public String getSiteUrl() { + return siteUrl; + } + public String getDelimiter() { + return delimiter; + } + public List<Page> getPages() { + return pages; + } + + public String getCode() { + return code; + } + + public static Site fromXMLData(String data) throws ParserConfigurationException, IOException, SAXException { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + DocumentBuilder builder = factory.newDocumentBuilder(); + Document document = builder.parse(IOUtils.toInputStream(data, StandardCharsets.UTF_8)); + Element sape = document.getDocumentElement(); + String code = StringUtils.EMPTY; + String siteUrl = sape.getAttribute("site_url"); + String delimiter = sape.getAttribute("delimiter"); + NodeList pageNodes = sape.getElementsByTagName("page"); + List<Page> pages = new ArrayList<>(pageNodes.getLength()); + for (int i = 0; i < pageNodes.getLength(); i++) { + Element pageElement = (Element) pageNodes.item(i); + String uri = pageElement.getAttribute("uri"); + if (uri.equals("*")) { + code = pageElement.getTextContent(); + } + NodeList linkNodes = pageElement.getElementsByTagName("link"); + List<String> links = new ArrayList<>(linkNodes.getLength()); + for (int j = 0; j < linkNodes.getLength(); j++) { + links.add(linkNodes.item(j).getTextContent()); + } + pages.add(new Page(uri, links)); + } + return new Site(siteUrl, code, delimiter, pages); + } +} diff --git a/src/main/java/ru/sape/Sape.java b/src/main/java/ru/sape/Sape.java index a94bcc62..6b28be25 100644 --- a/src/main/java/ru/sape/Sape.java +++ b/src/main/java/ru/sape/Sape.java @@ -4,6 +4,7 @@ package ru.sape; import java.net.URI; +import java.util.Optional; public class Sape { @@ -19,7 +20,8 @@ public class Sape { } public boolean debug = false; - public SapePageLinks getPageLinks(URI requestUri, String cookie) { - return new SapePageLinks(sapePageLinkConnection, sapeUser, requestUri, cookie, debug); + public Optional<SapePageLinks> getPageLinks(URI requestUri, String cookie) { + return sapePageLinkConnection.getData() + .map(site -> new SapePageLinks(site, sapeUser, requestUri, cookie, debug)); } } diff --git a/src/main/java/ru/sape/SapeConnection.java b/src/main/java/ru/sape/SapeConnection.java index ee5a5e5c..cec308bf 100644 --- a/src/main/java/ru/sape/SapeConnection.java +++ b/src/main/java/ru/sape/SapeConnection.java @@ -1,8 +1,8 @@ package ru.sape; -import com.github.ooxi.phparser.SerializedPhpParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.juick.www.ad.models.Site; import java.io.BufferedReader; import java.io.IOException; @@ -66,17 +66,16 @@ public class SapeConnection { } } } - Map<String, Object> cached; + Site cachedSite; long cacheUpdated; - @SuppressWarnings("unchecked") - public Map<String, Object> getData() { + public Optional<Site> getData() { if (cacheLifeTime <= (System.currentTimeMillis() - cacheUpdated) / 1000) { for (String server : serverList) { String data; try { - data = fetchRemoteFile(server, dispenserPath + "&charset=UTF-8"); + data = fetchRemoteFile(server, dispenserPath + "&charset=UTF-8&as_xml=true"); } catch (IOException e1) { continue; } @@ -88,7 +87,7 @@ public class SapeConnection { } try { - cached = (Map<String, Object>) new SerializedPhpParser(data).parse(); + cachedSite = Site.fromXMLData(data); } catch (Exception e) { logger.error("Can't parse Sape data", e); continue; @@ -96,14 +95,14 @@ public class SapeConnection { cacheUpdated = System.currentTimeMillis(); - return cached; + return Optional.of(cachedSite); } logger.error("Unable to fetch Sape data"); - return Collections.emptyMap(); + return Optional.empty(); } - return cached; + return Optional.of(cachedSite); } } diff --git a/src/main/java/ru/sape/SapePageLinks.java b/src/main/java/ru/sape/SapePageLinks.java index 77715aea..52def3f6 100644 --- a/src/main/java/ru/sape/SapePageLinks.java +++ b/src/main/java/ru/sape/SapePageLinks.java @@ -1,71 +1,39 @@ package ru.sape; +import com.juick.www.ad.models.Site; import org.apache.commons.lang3.StringUtils; import java.net.URI; -import java.util.*; +import java.util.Collections; +import java.util.List; +import java.util.stream.Collectors; public class SapePageLinks { - private boolean showCode; + private final boolean showCode; + private final String linkDelimiter; + private final List<String> pageLinks; - @SuppressWarnings("unchecked") - public SapePageLinks(SapeConnection sapeConnection, String sapeUser, URI request, String sapeCookie, boolean showCode) { + public SapePageLinks(Site site, String sapeUser, URI request, String sapeCookie, boolean showPageCode) { String req = StringUtils.isNotEmpty(request.getQuery()) ? request.getPath() + "?" + request.getQuery() : request.getPath(); - if (sapeUser.equals(sapeCookie)) { - showCode = true; - } - - Map<String, Object> data = sapeConnection.getData(); - if (data.containsKey("__sape_delimiter__")) { - linkDelimiter = (String) data.get("__sape_delimiter__"); - } - - if (data.containsKey(req)) { - pageLinks = new ArrayList<>(((Map<Object, String>) data.get(req)).values()); - } + linkDelimiter = site.getDelimiter(); - if (data.containsKey("__sape_new_url__")) { - if (showCode) { - Object newUrl = data.get("__sape_new_url__"); + this.showCode = showPageCode || sapeUser.equals(sapeCookie); - if (newUrl instanceof Map) { - pageLinks = new ArrayList<>(((Map<Object, String>) newUrl).values()); - } else { - pageLinks = new ArrayList<>(Collections.singletonList((String) newUrl)); - } - } + if (showCode) { + pageLinks = Collections.singletonList(site.getCode()); + } else { + pageLinks = site.getPages().stream().filter(page -> page.getUri().equals(req)) + .flatMap(page -> page.getLinks().stream()).collect(Collectors.toList()); } - - this.showCode = showCode; } - private String linkDelimiter = "."; - private List<String> pageLinks = new ArrayList<>(); - public String render() { - return render(-1); - } - public String render(int count) { + public String render() { StringBuilder s = new StringBuilder(); - - if (count < 0) { - count = pageLinks.size(); - } - - for (Iterator<String> i = pageLinks.iterator(); i.hasNext() && count > 0; count--) { - if (s.length() > 0) { - s.append(linkDelimiter); - } - - String l = i.next(); - - s.append(l); - - i.remove(); - } + s.append(String.join(linkDelimiter, pageLinks)); if (showCode) { s.insert(0, "<sape_noindex>"); |