Recipes for webscraping
Methods
Sorry, the docs for this are broken until nbdev fixes the @typedispatch
problem upstream.
Example downloading pokemon pictures
Catch em all (gen 1-7)
From: https://en.wikipedia.org/wiki/List_of_generation_I_Pok%C3%A9mon
from itertools import chain
import nest_asyncio
import pandas as pd
nest_asyncio.apply()
gens = ['I']#, 'II', 'III', 'IV', 'V', 'VI', 'VII']
sources = [f'https://en.wikipedia.org/wiki/List_of_generation_{gen}_Pok%C3%A9mon' for gen in gens]
pokemon_names = [pd.read_html(source)[1].iloc[:-1, 0].tolist() for source in sources]
pokemon_names = list(set(chain(*pokemon_names))) # flatten
save_path = Path('../../data/pokemon')
downloader = ImgDownloader(save_path=save_path)
downloader.get_imgs(pokemon_names)
!ls {save_path}
downloader.show_samples('Charmander');