Arce 发表于 2021-7-13 17:45:56

Node.js使用cheerio解析html

  cheerio语法类似jQuery
  doc

[*]doc-zh: https://github.com/cheeriojs/cheerio/wiki/Chinese-README
  安装
npm i cheerio
  代码实例

const cheerio = require("cheerio");

const doc = cheerio.load('<h2 class="title">Hello world</h2>', {
xmlMode: true,
decodeEntities: false
});

doc("h2.title").text("Hello there!");
doc("h2").addClass("welcome");

console.log(doc.xml());
// <h2 class="title welcome">Hello there!</h2>

  项目实战
import cheerio from "cheerio";

/**
*将外链图片转为本站连接
* @param {*} html
* @returns
*/
export async function replaceImage(html) {

const doc = cheerio.load(html, {
    xmlMode: true,
    decodeEntities: false
});

let elems = [];

// each不等待promise
doc("img").each(function(index, elem) {
    elems.push(doc(this));
});

for (let elem of elems) {
    let src = elem.attr("src");

    if (src && src.indexOf(process.env.VUE_APP_BASE_URL) == -1) {
      // 修改为自己的替换方法
      let imageSrc = await saveImage(src);

      if (imageSrc) {
      elem.attr("src", imageSrc);
      }
    }
}

return doc.xml();
}

/**
*提取图片连接
* @param {*} html
* @returns
*/
export function extractImages(html) {

const doc = cheerio.load(html, {
    xmlMode: true,
    decodeEntities: false
});

let images = [];

doc("img").each(function(index, elem) {
    let src = doc(this).attr("src");
    if (src) {
      images.push(src);
    }
});

return images;
}

  参考
https://www.npmjs.com/package/cheerio

  
文档来源:51CTO技术博客https://blog.51cto.com/u_13567403/3039813
页: [1]
查看完整版本: Node.js使用cheerio解析html