Add metadata generator and CLI integration
This commit is contained in:
parent
205ab25d41
commit
c19c5c21ab
6 changed files with 291 additions and 1 deletions
11
README.md
11
README.md
|
|
@ -129,6 +129,17 @@ cargo run --bin metadata_indexer -- --base-dir . refresh
|
|||
Passing `--books mlfs,blfs` restricts the refresh to specific books, and
|
||||
`--force` bypasses the local cache.
|
||||
|
||||
To materialise a Rust module from harvested metadata:
|
||||
|
||||
```bash
|
||||
cargo run --bin metadata_indexer -- \
|
||||
--base-dir . generate \
|
||||
--metadata ai/metadata/packages/mlfs/binutils-pass-1.json \
|
||||
--output target/generated/by_name
|
||||
```
|
||||
|
||||
Add `--overwrite` to regenerate an existing module directory.
|
||||
|
||||
## 📚 Documentation
|
||||
|
||||
- [Architecture Overview](docs/ARCHITECTURE.md) – high-level tour of the crate
|
||||
|
|
|
|||
|
|
@ -5,7 +5,9 @@
|
|||
first when revisiting the project.
|
||||
- `metadata_indexer` now supports a `refresh` command that pulls jhalfs
|
||||
`wget-list`/`md5sums` manifests into `ai/metadata/cache/` and the `harvest`
|
||||
command automatically draws URLs and checksums from those manifests.
|
||||
command automatically draws URLs and checksums from those manifests. A
|
||||
`generate` subcommand consumes harvested metadata and scaffolds Rust modules
|
||||
under `src/pkgs/by_name` (or a custom output directory).
|
||||
- AI state lives under `ai/`:
|
||||
- `ai/personas.json`, `ai/tasks.json`, `ai/bugs.json` track personas,
|
||||
outstanding work, and known issues.
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ This document explains the workflow and the supporting assets.
|
|||
| `index` | Re-runs validation and regenerates `index.json`. Use `--compact` to write a single-line JSON payload. |
|
||||
| `harvest` | Fetches a book page, scrapes build instructions, and emits a draft metadata record (to stdout with `--dry-run` or into `ai/metadata/packages/`). Falls back to jhalfs manifests when inline source links are absent. |
|
||||
| `refresh` | Updates cached jhalfs manifests (`wget-list`, `md5sums`) under `ai/metadata/cache/`. Supports `--books` filtering and `--force` to bypass the cache. |
|
||||
| `generate` | Translates harvested metadata into Rust modules under `src/pkgs/by_name` (or a specified directory), using the scaffolder to create `PackageDefinition` wrappers. |
|
||||
|
||||
### Harvesting flow
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,8 @@ use serde_json::{Value, json};
|
|||
use sha2::{Digest, Sha256};
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use package_management::pkgs::generator;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(
|
||||
name = "metadata-indexer",
|
||||
|
|
@ -64,6 +66,18 @@ enum Command {
|
|||
#[arg(long)]
|
||||
dry_run: bool,
|
||||
},
|
||||
/// Generate Rust modules from harvested metadata
|
||||
Generate {
|
||||
/// Path to the harvested metadata JSON file
|
||||
#[arg(long)]
|
||||
metadata: PathBuf,
|
||||
/// Output directory (should be the `by_name` root)
|
||||
#[arg(long, default_value = "src/pkgs/by_name")]
|
||||
output: PathBuf,
|
||||
/// Remove existing module directory before regeneration
|
||||
#[arg(long)]
|
||||
overwrite: bool,
|
||||
},
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
|
|
@ -221,6 +235,31 @@ fn main() -> Result<()> {
|
|||
println!("No manifests refreshed (check warnings above).");
|
||||
}
|
||||
}
|
||||
Command::Generate {
|
||||
metadata,
|
||||
output,
|
||||
overwrite,
|
||||
} => {
|
||||
if overwrite {
|
||||
match generator::module_directory(&metadata, &output) {
|
||||
Ok(dir) if dir.exists() => {
|
||||
fs::remove_dir_all(&dir).with_context(|| {
|
||||
format!("removing existing module {}", dir.display())
|
||||
})?;
|
||||
}
|
||||
Ok(_) => {}
|
||||
Err(err) => {
|
||||
eprintln!(
|
||||
"warning: could not determine existing module directory: {}",
|
||||
err
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let module_path = generator::generate_module(&metadata, &output)?;
|
||||
println!("Generated module at {}", module_path.display());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
|
|||
236
src/pkgs/generator.rs
Normal file
236
src/pkgs/generator.rs
Normal file
|
|
@ -0,0 +1,236 @@
|
|||
use std::collections::HashSet;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use anyhow::{Context, Result, anyhow};
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::pkgs::scaffolder::{self, ScaffoldRequest};
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct HarvestedPackage {
|
||||
package: HarvestedMetadata,
|
||||
source: HarvestedSource,
|
||||
#[serde(default)]
|
||||
build: Vec<CommandPhase>,
|
||||
#[serde(default)]
|
||||
dependencies: Option<HarvestedDependencies>,
|
||||
optimizations: HarvestedOptimisations,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct HarvestedMetadata {
|
||||
id: String,
|
||||
name: String,
|
||||
version: String,
|
||||
#[serde(default)]
|
||||
stage: Option<String>,
|
||||
#[serde(default)]
|
||||
variant: Option<String>,
|
||||
#[serde(default)]
|
||||
notes: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct HarvestedSource {
|
||||
#[serde(default)]
|
||||
archive: Option<String>,
|
||||
#[serde(default)]
|
||||
urls: Vec<HarvestedUrl>,
|
||||
#[serde(default)]
|
||||
checksums: Vec<HarvestedChecksum>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct HarvestedUrl {
|
||||
url: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct HarvestedChecksum {
|
||||
alg: String,
|
||||
value: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct HarvestedOptimisations {
|
||||
enable_lto: bool,
|
||||
enable_pgo: bool,
|
||||
#[serde(default)]
|
||||
cflags: Vec<String>,
|
||||
#[serde(default)]
|
||||
ldflags: Vec<String>,
|
||||
#[serde(default)]
|
||||
profdata: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct CommandPhase {
|
||||
#[serde(default)]
|
||||
phase: Option<String>,
|
||||
#[serde(default)]
|
||||
commands: Vec<String>,
|
||||
#[serde(default)]
|
||||
cwd: Option<String>,
|
||||
#[serde(default)]
|
||||
requires_root: Option<bool>,
|
||||
#[serde(default)]
|
||||
notes: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
struct HarvestedDependencies {
|
||||
#[serde(default)]
|
||||
build: Vec<String>,
|
||||
#[serde(default)]
|
||||
runtime: Vec<String>,
|
||||
}
|
||||
|
||||
/// Generate a Rust module from harvested metadata, returning the path to the generated file.
|
||||
pub fn generate_module(
|
||||
metadata_path: impl AsRef<Path>,
|
||||
base_dir: impl AsRef<Path>,
|
||||
) -> Result<PathBuf> {
|
||||
let harvested = parse_metadata(metadata_path.as_ref())?;
|
||||
let request = build_request(&harvested)?;
|
||||
let result = scaffolder::scaffold_package(base_dir.as_ref(), request)?;
|
||||
Ok(result.module_path)
|
||||
}
|
||||
|
||||
/// Compute the directory for a module derived from the given metadata.
|
||||
pub fn module_directory(
|
||||
metadata_path: impl AsRef<Path>,
|
||||
base_dir: impl AsRef<Path>,
|
||||
) -> Result<PathBuf> {
|
||||
let harvested = parse_metadata(metadata_path.as_ref())?;
|
||||
let slug = module_override_from_id(&harvested.package.id).ok_or_else(|| {
|
||||
anyhow!(
|
||||
"unable to derive module slug from id '{}'",
|
||||
harvested.package.id
|
||||
)
|
||||
})?;
|
||||
let module = sanitize_module_name(&slug);
|
||||
let dir = base_dir
|
||||
.as_ref()
|
||||
.join(prefix_from_module(&module))
|
||||
.join(module);
|
||||
Ok(dir)
|
||||
}
|
||||
|
||||
fn build_request(pkg: &HarvestedPackage) -> Result<ScaffoldRequest> {
|
||||
let slug = module_override_from_id(&pkg.package.id)
|
||||
.ok_or_else(|| anyhow!("unable to derive module slug from id '{}'", pkg.package.id))?;
|
||||
|
||||
let mut build_commands = Vec::new();
|
||||
let mut install_commands = Vec::new();
|
||||
for command in flatten_commands(&pkg.build) {
|
||||
if command.contains("make install") {
|
||||
install_commands.push(command);
|
||||
} else {
|
||||
build_commands.push(command);
|
||||
}
|
||||
}
|
||||
|
||||
let mut dependencies = HashSet::new();
|
||||
if let Some(deps) = &pkg.dependencies {
|
||||
for dep in &deps.build {
|
||||
dependencies.insert(dep.clone());
|
||||
}
|
||||
for dep in &deps.runtime {
|
||||
dependencies.insert(dep.clone());
|
||||
}
|
||||
}
|
||||
let mut dependencies: Vec<String> = dependencies.into_iter().collect();
|
||||
dependencies.sort();
|
||||
|
||||
let request = ScaffoldRequest {
|
||||
name: pkg.package.name.clone(),
|
||||
version: pkg.package.version.clone(),
|
||||
source: pkg.source.urls.first().map(|u| u.url.clone()),
|
||||
md5: pkg
|
||||
.source
|
||||
.checksums
|
||||
.iter()
|
||||
.find(|c| c.alg.eq_ignore_ascii_case("md5"))
|
||||
.map(|c| c.value.clone()),
|
||||
configure_args: Vec::new(),
|
||||
build_commands,
|
||||
install_commands,
|
||||
dependencies,
|
||||
enable_lto: pkg.optimizations.enable_lto,
|
||||
enable_pgo: pkg.optimizations.enable_pgo,
|
||||
cflags: pkg.optimizations.cflags.clone(),
|
||||
ldflags: pkg.optimizations.ldflags.clone(),
|
||||
profdata: pkg.optimizations.profdata.clone(),
|
||||
stage: pkg.package.stage.clone(),
|
||||
variant: pkg.package.variant.clone(),
|
||||
notes: pkg.package.notes.clone(),
|
||||
module_override: Some(slug),
|
||||
};
|
||||
|
||||
Ok(request)
|
||||
}
|
||||
|
||||
fn flatten_commands(phases: &[CommandPhase]) -> Vec<String> {
|
||||
phases
|
||||
.iter()
|
||||
.flat_map(|phase| phase.commands.iter().cloned())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn module_override_from_id(id: &str) -> Option<String> {
|
||||
let slug = match id.split_once('/') {
|
||||
Some((_, slug)) => slug,
|
||||
None => id,
|
||||
};
|
||||
Some(
|
||||
slug.replace('.', "_")
|
||||
.replace('/', "_")
|
||||
.replace('-', "_")
|
||||
.replace(' ', "_")
|
||||
.to_lowercase(),
|
||||
)
|
||||
}
|
||||
|
||||
fn parse_metadata(path: &Path) -> Result<HarvestedPackage> {
|
||||
let metadata = fs::read_to_string(path)
|
||||
.with_context(|| format!("reading metadata file {}", path.display()))?;
|
||||
let harvested: HarvestedPackage = serde_json::from_str(&metadata)
|
||||
.with_context(|| format!("parsing harvested metadata from {}", path.display()))?;
|
||||
Ok(harvested)
|
||||
}
|
||||
|
||||
fn sanitize_module_name(name: &str) -> String {
|
||||
let mut out = String::new();
|
||||
for ch in name.chars() {
|
||||
if ch.is_ascii_alphanumeric() {
|
||||
out.push(ch.to_ascii_lowercase());
|
||||
} else if ch == '_' || ch == '+' || ch == '-' {
|
||||
out.push('_');
|
||||
} else {
|
||||
out.push('_');
|
||||
}
|
||||
}
|
||||
if out.is_empty() {
|
||||
out.push_str("pkg");
|
||||
}
|
||||
if out
|
||||
.chars()
|
||||
.next()
|
||||
.map(|c| c.is_ascii_digit())
|
||||
.unwrap_or(false)
|
||||
{
|
||||
out.insert(0, 'p');
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
fn prefix_from_module(module: &str) -> String {
|
||||
let mut chars = module.chars();
|
||||
let first = chars.next().unwrap_or('p');
|
||||
let second = chars.next().unwrap_or('k');
|
||||
let mut s = String::new();
|
||||
s.push(first);
|
||||
s.push(second);
|
||||
s
|
||||
}
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
pub mod by_name;
|
||||
pub mod generator;
|
||||
pub mod mlfs;
|
||||
pub mod package;
|
||||
pub mod scaffolder;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue