working
This commit is contained in:
parent
1eb7d4c1eb
commit
35c3f4c423
10 changed files with 353 additions and 738 deletions
39
src/html.rs
39
src/html.rs
|
|
@ -1,33 +1,14 @@
|
|||
use html_parser::Dom;
|
||||
use reqwest::blocking::get;
|
||||
use std::error::Error;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
/// Lädt die HTML-Seite von der angegebenen URL herunter und konvertiert sie in JSON
|
||||
pub fn fetch_and_parse_html_to_json(url: &str) -> Result<String, Box<dyn Error>> {
|
||||
// HTML herunterladen
|
||||
let response = get(url)?;
|
||||
if !response.status().is_success() {
|
||||
return Err(format!("Fehler beim Abrufen der URL {}: {}", url, response.status()).into());
|
||||
pub fn fetch_pre_blocks(url: &str) -> anyhow::Result<Vec<String>> {
|
||||
let body = reqwest::blocking::get(url)?.text()?;
|
||||
let document = Html::parse_document(&body);
|
||||
let selector = Selector::parse("pre").unwrap();
|
||||
|
||||
let mut results = Vec::new();
|
||||
for element in document.select(&selector) {
|
||||
results.push(element.inner_html());
|
||||
}
|
||||
|
||||
let body = response.text()?;
|
||||
|
||||
// HTML parsen
|
||||
let dom = Dom::parse(&body)?;
|
||||
|
||||
// In JSON konvertieren
|
||||
let json = dom.to_json_pretty()?;
|
||||
Ok(json)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_fetch_and_parse() {
|
||||
let url = "https://www.linuxfromscratch.org/~thomas/multilib-m32/chapter02/hostreqs.html";
|
||||
let json = fetch_and_parse_html_to_json(url).expect("Fehler beim Parsen");
|
||||
assert!(json.contains("Host System Requirements"));
|
||||
}
|
||||
Ok(results)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue