评论

收藏

[JavaScript] Node.js使用cheerio解析html

开发技术 开发技术 发布于:2021-07-13 17:45 | 阅读数:501 | 评论:0

  cheerio语法类似jQuery
  doc

  • doc-zh: https://github.com/cheeriojs/cheerio/wiki/Chinese-README
  安装
npm i cheerio
  代码实例
const cheerio = require("cheerio");
const doc = cheerio.load('<h2 class="title">Hello world</h2>', {
  xmlMode: true,
  decodeEntities: false
});
doc("h2.title").text("Hello there!");
doc("h2").addClass("welcome");
console.log(doc.xml());
// <h2 class="title welcome">Hello there!</h2>
  项目实战
import cheerio from "cheerio";
/**
 *  将外链图片转为本站连接
 * @param {*} html
 * @returns
 */
export async function replaceImage(html) {
  const doc = cheerio.load(html, {
  xmlMode: true,
  decodeEntities: false
  });
  let elems = [];
  // each不等待promise
  doc("img").each(function(index, elem) {
  elems.push(doc(this));
  });
  for (let elem of elems) {
  let src = elem.attr("src");
  if (src && src.indexOf(process.env.VUE_APP_BASE_URL) == -1) {
    // 修改为自己的替换方法
    let imageSrc = await saveImage(src);
    if (imageSrc) {
    elem.attr("src", imageSrc);
    }
  }
  }
  return doc.xml();
}
/**
 *  提取图片连接
 * @param {*} html
 * @returns
 */
export function extractImages(html) {
  const doc = cheerio.load(html, {
  xmlMode: true,
  decodeEntities: false
  });
  let images = [];
  doc("img").each(function(index, elem) {
  let src = doc(this).attr("src");
  if (src) {
    images.push(src);
  }
  });
  return images;
}
  参考
https://www.npmjs.com/package/cheerio

  
关注下面的标签,发现更多相似文章