RssFeedFactory.java
package org.ferris.resiste.console.rss;
import com.rometools.rome.feed.synd.SyndEnclosure;
import com.rometools.rome.io.FeedException;
import com.rometools.rome.io.SyndFeedInput;
import com.rometools.rome.io.XmlReader;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.stream.Collectors;
import javax.enterprise.context.ApplicationScoped;
import javax.inject.Inject;
import org.ferris.resiste.console.lang.StringUtils;
import org.ferris.resiste.console.retry.ExceptionRetry;
import org.jdom2.Element;
import org.slf4j.Logger;
/**
* RssFeedFactory
*
* @author Michael Remijan mjremijan@yahoo.com @mjremijan
*/
@ApplicationScoped
public class RssFeedFactory {
@Inject
protected Logger log;
@ExceptionRetry
public RssFeed build(RssUrl feedUrl) throws IOException, FeedException {
log.debug(String.format("ENTER %s", feedUrl));
String rawXml = "RAW_XML";
try {
RssConnection connection
= feedUrl.openConnection();
rawXml
= new BufferedReader(new InputStreamReader(connection.getInputStream(), "UTF-8")).lines().collect(Collectors.joining("\n"));
com.rometools.rome.feed.synd.SyndFeed romeFeed
= new SyndFeedInput().build(new XmlReader(new ByteArrayInputStream(rawXml.getBytes("UTF-8"))));
List<com.rometools.rome.feed.synd.SyndEntry> romeEntries
= romeFeed.getEntries();
RssFeed feed = new RssFeed();
feed.setId(feedUrl.getId());
feed.setLink(romeFeed.getLink());
feed.setTitle(romeFeed.getTitle());
feed.setEntries(
romeEntries.stream()
.map(re -> {
RssEntry e = new RssEntry();
// FeedId
e.setFeedId(feedUrl.getId());
// EntryId
e.setEntryId(re.getUri());
// Title
e.setTitle(re.getTitle());
// Author
e.setAuthor(re.getAuthor());
// Link
e.setLink(re.getLink());
{
if (!e.getLink().startsWith("http")) {
e.setLink(feed.getLink() + e.getLink());
}
}
// Published date
e.setPublishedDate(re.getPublishedDate());
// Enclosures
Optional<List<SyndEnclosure>> enclosures
= Optional.ofNullable(re.getEnclosures());
// Foreign markup
Optional<List<Element>> foreignMarkups
= Optional.ofNullable(re.getForeignMarkup());
// Images
{
List<RssImage> images = new LinkedList<>();
enclosures.ifPresent(
se -> se.stream().filter(a -> a.getType().toLowerCase().startsWith("image")).forEach(
b -> images.add(new RssImage(b.getType(), b.getUrl())))
);
List<RssImage> thumbnails = new LinkedList<>();
foreignMarkups.ifPresent(
fm -> fm.stream()
.filter(el -> el.getName().toLowerCase().equals("thumbnail"))
.map(el -> el.getAttribute("url"))
.filter(at -> at != null)
.map(at -> StringUtils.trimToNull(at.getValue()))
.filter(s -> s != null)
.forEach(s -> thumbnails.add(new RssImage("image/thumbnail", s)))
);
images.removeIf(img -> {
// Get image url without extension...i.e no ".jpg"
// https://cdn.mos.cms.futurecdn.net/5DUh8HXQKMsKtHTxKwXXgZ.jpg
final String imgUrl
= img.getUrl().substring(0, img.getUrl().lastIndexOf("."));
// See if any thumbnail starts with the same url.
long count
= thumbnails.stream().filter(th -> th.getUrl().startsWith(imgUrl)).count();
// Remove image url and keep thumbnail if so.
return count > 0;
});
images.forEach(img -> e.addImage(img));
thumbnails.forEach(img -> e.addImage(img));
}
// Other media files
enclosures.ifPresent(
se -> se.stream().filter(a -> !a.getType().toLowerCase().startsWith("image")).forEach(
b -> e.addMediaFile(new RssMediaFile(b.getType(), b.getUrl())))
);
// Content
StringBuilder sp = new StringBuilder("");
if (re.getContents() != null && !re.getContents().isEmpty()) {
re.getContents().stream()
.forEach(sc -> sp.append(sc.getValue()));
} else if (re.getDescription() != null) {
sp.append(re.getDescription().getValue());
}
e.setContents(sp.toString());
return e;
}).collect(Collectors.toCollection(LinkedList::new))
);
return feed;
} catch (FeedException e) {
log.error(String.format("Error parsing RSS feed \"%s\"", feedUrl.toString()));
log.error(String.format("%nRAW_XML%n%s", rawXml));
throw new FeedException(
String.format("URL=\"%s\", RAW_XML=\"%s\"", feedUrl.toString(), rawXml),
e);
}
}
}