RssUrlFactory.java

package org.ferris.resiste.console.rss;

import java.util.Optional;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.enterprise.context.ApplicationScoped;
import javax.inject.Inject;
import org.ferris.resiste.console.lang.StringUtils;
import org.slf4j.Logger;

/**
 *
 * @author Michael Remijan mjremijan@yahoo.com @mjremijan
 */
@ApplicationScoped
public class RssUrlFactory {

    @Inject
    protected Logger log;

    public Optional<RssUrl> parse(String commaSeparatedFeedData) {
        log.info(String.format("ENTER \"%s\"", commaSeparatedFeedData));

        commaSeparatedFeedData = StringUtils.trimToEmpty(commaSeparatedFeedData);

        if (commaSeparatedFeedData.isEmpty()) {
            return Optional.empty();
        }

        if (commaSeparatedFeedData.startsWith("#")) {
            return Optional.empty();
        }

        Pattern p 
            = Pattern.compile("^([^,]+),([^,]+)(,\\s*regex\\[\\[(.+)\\]\\]\\s*)?$", Pattern.MULTILINE);
        //    ^ asserts position at start of a line
        //    1st Capturing Group ([^,]+)
        //    Match a single character not present in the list below [^,]
        //    + matches the previous token between one and unlimited times, as many times as possible, giving back as needed (greedy)
        //    , matches the character , with index 4410 (2C16 or 548) literally (case sensitive)
        //    , matches the character , with index 4410 (2C16 or 548) literally (case sensitive)
        //    2nd Capturing Group ([^,]+)
        //    Match a single character not present in the list below [^,]
        //    + matches the previous token between one and unlimited times, as many times as possible, giving back as needed (greedy)
        //    , matches the character , with index 4410 (2C16 or 548) literally (case sensitive)
        //    3rd Capturing Group (,\s*regex\[\[(.+)\]\]\s*)?
        //    ? matches the previous token between zero and one times, as many times as possible, giving back as needed (greedy)
        //    , matches the character , with index 4410 (2C16 or 548) literally (case sensitive)
        //    \s matches any whitespace character (equivalent to [\r\n\t\f\v ])
        //    * matches the previous token between zero and unlimited times, as many times as possible, giving back as needed (greedy)
        //    regex matches the characters regex literally (case sensitive)
        //    \[ matches the character [ with index 9110 (5B16 or 1338) literally (case sensitive)
        //    \[ matches the character [ with index 9110 (5B16 or 1338) literally (case sensitive)
        //    4th Capturing Group (.+)
        //    . matches any character (except for line terminators)
        //    + matches the previous token between one and unlimited times, as many times as possible, giving back as needed (greedy)
        //    \] matches the character ] with index 9310 (5D16 or 1358) literally (case sensitive)
        //    \] matches the character ] with index 9310 (5D16 or 1358) literally (case sensitive)
        //    \s matches any whitespace character (equivalent to [\r\n\t\f\v ])
        //    * matches the previous token between zero and unlimited times, as many times as possible, giving back as needed (greedy)
        //    $ asserts position at the end of a line
        Matcher m = p.matcher(commaSeparatedFeedData);
        
        if (!m.matches()) {
            throw new RuntimeException(
                String.format("Line \"%s\" does not match regex pattern \"%s\"", commaSeparatedFeedData, p.pattern())
            );
        }
                         
        // Regex pattern ensures this capturing group can't be null
        String id = m.group(1).trim();
        if (id.isEmpty()) {
            throw new RuntimeException(
                String.format("ID trimmed to empty: \"%s\"", commaSeparatedFeedData)
            );
        }

        // Regex pattern ensures this capturing group can't be null
        String url = m.group(2).trim();
        if (url.isEmpty()) {
            throw new RuntimeException(
                String.format("URL trimmed to empty: \"%s\"", commaSeparatedFeedData)
            );
        }
        
        Optional<Pattern> pattern =  Optional.empty();   
        // This regex pattern capturing group may be null
        String userPattern = m.group(4);
        if (userPattern != null) {            
            try {
                pattern = Optional.of(Pattern.compile(userPattern));
            } catch (Exception e) {
                throw new RuntimeException(
                    String.format("User defined regex pattern \"%s\" failed to compile", userPattern)
                );
            }
        }

        return Optional.of(new RssUrl(id, url, pattern));
    }
}