Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

How to get rid of unnecessary(?) code - adjusting to DRY principle

I was with the similiar topic some time ago. I'm looking at my app and I think it has a lot of unnecessary code. What I mean is I have service that is responsible for scraping data from different categories of books from two bookstores. Right now I have 5 categories so I have 5 methods, but what if I'm gonna add some new categories? I will have to add more methods... and I think it is not a good option. Right now it looks like this:

Controller

@GetMapping("/romances")
    public Map<Bookstore, List<Book>> get15RomanticBooks() {
        return categorizedBookService.get15BooksFromRomanceCategory();
    }

    @GetMapping("/biographies")
    public Map<Bookstore, List<Book>> get15BiographiesBooks() {
        return categorizedBookService.get15BooksFromBiographiesCategory();
    }

    @GetMapping("/guides")
    public Map<Bookstore, List<Book>> get15GuidesBooks() {
        return categorizedBookService.get15BooksFromGuidesCategory();
    }

    @GetMapping("/fantasy")
    public Map<Bookstore, List<Book>> get15FantasyBooks() {
        return categorizedBookService.get15BooksFromFantasyCategory();
    }

and here I was thinking about

@GetMapping("/{category}")
public Map<......> get 15BooksFromCategory(@PathVariable CategoryType category)
{...}

I think this is the best way, but it is harder with service.

Service for it looks like this:

package bookstore.scraper.book.scrapingtypeservice;

import bookstore.scraper.enums.Bookstore;
import bookstore.scraper.book.Book;
import bookstore.scraper.fetcher.empik.EmpikFetchingBookService;
import bookstore.scraper.fetcher.merlin.MerlinFetchingBookService;
import bookstore.scraper.urlproperties.EmpikUrlProperties;
import bookstore.scraper.urlproperties.MerlinUrlProperties;
import bookstore.scraper.utilities.JSoupConnector;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.util.EnumMap;
import java.util.List;
import java.util.Map;

@Service
@Slf4j
public class CategorizedBookService {

    private final EmpikFetchingBookService empikBookService;
    private final MerlinFetchingBookService merlinFetchingBookService;
    private final EmpikUrlProperties empikUrlProperties;
    private final MerlinUrlProperties merlinUrlProperties;
    private final JSoupConnector jSoupConnector;

    @Autowired
    public CategorizedBookService(EmpikFetchingBookService empikBookService, MerlinFetchingBookService merlinFetchingBookService, EmpikUrlProperties empikUrlProperties, MerlinUrlProperties merlinUrlProperties, JSoupConnector jSoupConnector) {
        this.empikBookService = empikBookService;
        this.merlinFetchingBookService = merlinFetchingBookService;
        this.empikUrlProperties = empikUrlProperties;
        this.merlinUrlProperties = merlinUrlProperties;
        this.jSoupConnector = jSoupConnector;
    }

    public Map<Bookstore, List<Book>> get15BooksFromRomanceCategory() {
        return get15BooksFrom(empikUrlProperties.getEmpik().getRomances(), merlinUrlProperties.getMerlin().getRomances());
    }

    public Map<Bookstore, List<Book>> get15BooksFromFantasyCategory() {
        return get15BooksFrom(empikUrlProperties.getEmpik().getFantasy(), merlinUrlProperties.getMerlin().getFantasy());
    }

    public Map<Bookstore, List<Book>> get15BooksFromCrimeCategory() {
        return get15BooksFrom(empikUrlProperties.getEmpik().getCrime(), merlinUrlProperties.getMerlin().getCrime());
    }

    public Map<Bookstore, List<Book>> get15BooksFromGuidesCategory() {
        return get15BooksFrom(empikUrlProperties.getEmpik().getGuides(), merlinUrlProperties.getMerlin().getGuides());
    }

    public Map<Bookstore, List<Book>> get15BooksFromBiographiesCategory() {
        return get15BooksFrom(empikUrlProperties.getEmpik().getBiographies(), merlinUrlProperties.getMerlin().getBiographies());
    }

    private Map<Bookstore, List<Book>> get15BooksFrom(String bookStoreEmpikURL, String bookStoreMerlinURL) {
        Map<Bookstore, List<Book>> bookstoreWith15CategorizedBooks = new EnumMap<>(Bookstore.class);

        bookstoreWith15CategorizedBooks.put(Bookstore.EMPIK, empikBookService
                .get15BooksFromCategory(jSoupConnector.connect(bookStoreEmpikURL)));
        bookstoreWith15CategorizedBooks.put(Bookstore.MERLIN, merlinFetchingBookService
                .get15BooksFromCategory(jSoupConnector.connect(bookStoreMerlinURL)));

        return bookstoreWith15CategorizedBooks;
    }
}

I have to pass 2 different links depend on which category was called. Is there any way to do this?

EmpikBookService/merlinFetchingBookService are services that use Jsoup to scrape data.

package bookstore.scraper.fetcher.empik;

import bookstore.scraper.book.Book;
import bookstore.scraper.urlproperties.EmpikUrlProperties;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.util.ArrayList;
import java.util.List;
import java.util.stream.IntStream;

@Service
public class EmpikFetchingBookService {

    private static final int FIRST_PART_PRICE = 0;
    private static final int SECOND_PART_PRICE = 1;

    private static final int BESTSELLERS_NUMBER_TO_FETCH = 5;
    private static final int CATEGORIZED_BOOKS_NUMBER_TO_FETCH = 15;
    private static final String DIV_PRODUCT_WRAPPER = "div.productWrapper";
    private static final String DATA_PRODUCT_ID = "data-product-id";

    private final EmpikUrlProperties empikUrlProperties;

    @Autowired
    public EmpikFetchingBookService(EmpikUrlProperties empikUrlProperties) {
        this.empikUrlProperties = empikUrlProperties;
    }

    public Book getMostPreciseEmpikBook(Document document) {
        String author = document.select("div.smartAuthorWrapper.ta-product-smartauthor").select("a").first().text();
        String price = convertEmpikPriceWithPossibleDiscountToActualPrice(document.select("div.price.ta-price-tile").first().text());
        String title = document.select(DIV_PRODUCT_WRAPPER).select("strong").first().text();
        String productID = document.select(DIV_PRODUCT_WRAPPER).select("a").first().attr(DATA_PRODUCT_ID);
        String bookUrl = createBookURL(title, productID);

        return Book.builder()
                .author(author)
                .price(price)
                .title(title)
                .productID(productID)
                .bookURL(bookUrl).build();
    }

    public List<Book> get5BestSellersEmpik(Document document) {
        List<Element> siteElements = document.select(DIV_PRODUCT_WRAPPER);
        List<Book> empikBestSellers = new ArrayList<>();

        IntStream.range(0, BESTSELLERS_NUMBER_TO_FETCH)
                .forEach(iteratedElement -> {

                    String author = siteElements.get(iteratedElement).select("div.smartAuthorWrapper.ta-product-smartauthor").select("a").first().text();
                    String price = convertEmpikPriceWithPossibleDiscountToActualPrice(siteElements.get(iteratedElement).select("div.price.ta-price-tile").first().text());
                    String title = siteElements.get(iteratedElement).select("strong").first().ownText();
                    String productID = siteElements.get(iteratedElement).select(DIV_PRODUCT_WRAPPER).select("a").first().attr(DATA_PRODUCT_ID);
                    String bookUrl = createBookURL(title, productID);

                    empikBestSellers.add(Book.builder()
                            .author(author)
                            .price(price)
                            .title(title)
                            .productID(productID)
                            .bookURL(bookUrl)
                            .build());
                });
        return empikBestSellers;
    }

    public List<Book> get15BooksFromCategory(Document document) {
        List<Book> books = new ArrayList<>();
        List<Element> siteElements = document.select("div.productBox__info");

        IntStream.range(0, CATEGORIZED_BOOKS_NUMBER_TO_FETCH)
                .forEach(iteratedElement -> {

                    String author = executeFetchingAuthorProcess(siteElements, iteratedElement);
                    String price = convertEmpikPriceWithPossibleDiscountToActualPrice(siteElements.get(iteratedElement).select("div.productBox__price").first().text());
                    String title = siteElements.get(iteratedElement).select("span").first().ownText();
                    String productID = siteElements.get(iteratedElement).select("a").first().attr(DATA_PRODUCT_ID);
                    String bookUrl = createBookURL(title, productID);

                    books.add(Book.builder()
                            .author(author)
                            .price(price)
                            .title(title)
                            .productID(productID)
                            .bookURL(bookUrl)
                            .build());
                });

        return books;
    }

    private String convertEmpikPriceWithPossibleDiscountToActualPrice(String price) {
        String[] splittedElements = price.split("\\s+");
        return splittedElements[FIRST_PART_PRICE] + splittedElements[SECOND_PART_PRICE];
    }

    private String createBookURL(String title, String productID) {
        return String.format(empikUrlProperties.getEmpik().getConcreteBook(), title, productID);
    }

    //method is required as on empik site, sometimes occurs null for author and we need to change code for fetching
    private static String executeFetchingAuthorProcess(List<Element> siteElements, int i) {
        String author;
        Element authorElements = siteElements.get(i).select("span > a").first();
        if (authorElements != null)
            author = authorElements.ownText();
        else
            author = siteElements.get(i).select("> span > span").first().text();
        return author;
    }
}
like image 484
mara122 Avatar asked Aug 06 '19 20:08

mara122


Video Answer


2 Answers

(1) The name get15BooksFromCategory(CategoryType) is not right: you are hardcoding a number of books to be returned into the method name.

Today you return 15, tomorrow you would need to return 20, on Sundays you may need to return 5, for Andrews you may need to return 50. You get the point.

Consider these signatures.

getAllBooksFromCategory(CategoryType);
getNBooksFromCategory(CategoryType, Integer);

(2) Get rid of these fields in the service.

private final EmpikUrlProperties empikUrlProperties;
private final MerlinUrlProperties merlinUrlProperties;
private final JSoupConnector jSoupConnector;

The first two are parts of EmpikFetchingBookService and MerlinFetchingBookService, respectively. JSoupConnector is a more low-level abstraction and it shouldn't appear at this level. It may reside in a common parent of those book services or be a separate JSoupService that the common parent is dependent on.

(3) Ideally, you should end up with a very simple service which has a single responsibility - to collect books from its sources.

 class BookService {
      private List<BookServiceSource> sources;

      public Map<String, List<Book>> getBooksByCategory(Category category) {
          return sources.stream()
              .collect(Collectors.toMap(BookServiceSource::getName, 
                  source -> source.getBooksByCategory(category)));
      }
 }

BookServiceSource has a similar interface as BookService does. However, MerlinSource, as a subclass of BookServiceSource, doesn't the delegate the job to others. Instead, it prepares an URL and gives it to the JSoupService.

The responsibility of a BookServiceSource is to prepare request parameters and transform the result returned from the JSoupService into a List<Book>. Since each bookstore has a different DOM, you need to know how a particular DOM can be mapped into your structure.

interface BookServiceSource {
    String getName();
    List<Book> getBooksByCategory(Category category);
}

class MerlinSource implements BookServiceSource {
    private JSoupService service;
    private MerlinUrlProperties properties;

    @Override
    public String getName() {
      return "merlin";
    }

    @Override
    public List<Book> getBooksByCategory(Category category) {
      // at this point, we have both 
      // JSoupService (to make a real request) and 
      // MerlinUrlProperties (to prepare everything for that request)
    }
}

Think of the MerlinUrlProperties as a utility that can give a mapping between the category and a URL to books from that category.

MerlinUrlProperties could be a Map itself if it contains nothing but a bunch of methods that return URLs. The point is you don't have to define a new method for a new category and force everybody who uses your API to change themselves in order to include a new part of the API. With a Map or enum, the interface would be more stable.

Map<String, String> categoryToMarlinURL = new HashMap<>();

categoryToMarlinURL.put("horror", "marlin.com/horror");
categoryToMarlinURL.put("drama", "marlin.com/drama");

You have everything you need:

  • the category (category),
  • the URL to that category (categoryToMarlinURL.get(category)),
  • the service that makes requests (jSoupService.connect(categoryToMarlinURL.get(category))).
like image 66
Andrew Tobilko Avatar answered Nov 01 '22 10:11

Andrew Tobilko


Implement the Chain Of Responsibility pattern and allow services to fetch and put the results to the result Map object. Also let the Spring to make some magic with autowiring Services by providing some common interface

public interface FetchingService {
    public Map<Bookstore, List<Book>> fetchAndAddToResult(Map<Bookstore, List<Book>> result, CategoryType category);
}

@Service
public class EmpikFetchingBookService implements FetchingService  {

    // ...

    @Override
    public Map<Bookstore, List<Book>> fetchAndAddToResult(Map<Bookstore, List<Book>> result, CategoryType category) {
        result.put(Bookstore.EMPIK, getListOfBooks(category));
        return result;
    }
}

@Service
public class MerlinFetchingBookService implements FetchingService  {

    // ...

    @Override
    public Map<Bookstore, List<Book>> fetchAndAddToResult(Map<Bookstore, List<Book>> result, CategoryType category) {
        result.put(Bookstore.MERLIN, getListOfBooks(category));
        return result;
    }
}

@Service
@Slf4j
public class CategorizedBookService {
    private final List<FetchingService> services;
    //JSoup connector and Properties move to FetchingServices because it is part of those services implementation

    @Autowired
    public CategorizedBookService(List<FetchingService> services) {
        this.services = services;
    }

    public Map<Bookstore, List<Book>> get15BooksByCategory(CategoryType category) {
        Map<Bookstore, List<Book>> result = new HashMap<>();
        for(FetchingService service : services) {
            result = service.fetchAndAddToResult(result, category);
        }
        return result;
    }
}
like image 25
m.antkowicz Avatar answered Nov 01 '22 10:11

m.antkowicz