如果开源网站可以向官网提交,每周抓取一次
自托管的话配置如下
api
.env
APPLICATION_ID=5SE6G4PQJP
API_KEY=1f79esda19349sddf1dsfsdf7a84f57c70c86
config
config.json
{
"index_name": "ycycxz.com",
"start_urls": ["https://ycycxz.com/"],
"sitemap_urls": ["https://ycycxz.com/sitemap.xml"],
"sitemap_alternate_links": true,
"stop_urls": ["/assets", "/*/tags"],
"selectors": {
"lvl0": {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]",
"type": "xpath",
"global": true
},
"lvl1": "header h1",
"lvl2": "article h2",
"lvl3": "article h3",
"lvl4": "article h4",
"lvl5": "article h5, article td:first-child",
"lvl6": "article h6",
"text": "article p, article li, article td:last-child"
},
"strip_chars": " .,;:#",
"custom_settings": {
"separatorsToIndex": "_",
"attributesForFaceting": ["language", "version", "type", "docusaurus_tag"],
"attributesToRetrieve": [
"hierarchy",
"content",
"anchor",
"url",
"url_without_anchor",
"type"
]
}
}
docker
要安装 jq 和 docker
docker run -it --env-file .env -e "CONFIG=$(cat config.json | jq -r tostring)" algolia/docsearch-scraper