Skip to content

Commit

Permalink
Download command for models
Browse files Browse the repository at this point in the history
Models are still not being published, so this fails downloading. Will
work in the future hopufully.
  • Loading branch information
ZJaume committed Aug 1, 2024
1 parent cebce9f commit e5cff56
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 6 deletions.
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ strum = { version = "0.25", features = ["derive"] }
strum_macros = "0.25"
wyhash2 = "0.2.1"
pyo3 = { version = "0.22", features = ["gil-refs"] }
reqwest = { version = "0.12", features = ["stream"] }
tokio = { version = "1", features = ["io-util", "rt-multi-thread", "signal"] }
futures-util = "0.3"

[dev-dependencies]
test-log = "0.2.15"
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ dynamic = ["version"]
[project.scripts]
heli = "heli_otr:cli_run"
heli-convert = "heli_otr:cli_convert"
heli-download = "heli_otr:cli_download"

[tool.maturin]
features = ["pyo3/extension-module"]
37 changes: 31 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@ use crate::identifier::Identifier;
pub mod languagemodel;
pub mod identifier;
pub mod lang;
mod utils;

const WORDMODEL_FILE: &str = "wordmodel.bin";
const CHARMODEL_FILE: &str = "charmodel.bin";

// Call python interpreter and obtain python path of our module
pub fn pythonpath() -> PyResult<String> {
pub fn module_path() -> PyResult<String> {
let mut path = String::new();
Python::with_gil(|py| {
// Instead of hardcoding the module name, obtain it from the crate name at compile time
Expand All @@ -33,13 +36,13 @@ pub fn pythonpath() -> PyResult<String> {
}

pub fn load_models(modelpath: &str) -> (Model, Model) {
let grampath = format!("{modelpath}/charmodel.bin");
let grampath = format!("{modelpath}/{CHARMODEL_FILE}");
let char_handle = thread::spawn(move || {
let path = Path::new(&grampath);
Model::from_bin(path)
});

let wordpath = format!("{modelpath}/wordmodel.bin");
let wordpath = format!("{modelpath}/{WORDMODEL_FILE}");
let word_handle = thread::spawn(move || {
let path = Path::new(&wordpath);
Model::from_bin(path)
Expand All @@ -60,7 +63,7 @@ pub struct PyIdentifier {
impl PyIdentifier {
#[new]
fn new() -> Self {
let modulepath = pythonpath().expect("Error loading python module path");
let modulepath = module_path().expect("Error loading python module path");
let (charmodel, wordmodel) = load_models(&modulepath);
let identifier = Identifier::new(
Arc::new(charmodel),
Expand All @@ -86,7 +89,7 @@ impl PyIdentifier {
#[pyfunction]
pub fn cli_run() -> PyResult<()> {
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
let modulepath = pythonpath().expect("Error loading python module path");
let modulepath = module_path().expect("Error loading python module path");
let (charmodel, wordmodel) = load_models(&modulepath);
let mut identifier = Identifier::new(
Arc::new(charmodel),
Expand All @@ -101,10 +104,31 @@ pub fn cli_run() -> PyResult<()> {
Ok(())
}

#[pyfunction]
pub fn cli_download() -> PyResult<()> {
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
let modulepath = module_path().expect("Error loading python module path");
let url = format!(
"https://github.com/ZJaume/heli-otr/releases/download/v{}",
env!("CARGO_PKG_VERSION"));

utils::download_file(
&format!("{url}/{WORDMODEL_FILE}"),
&format!("{modulepath}/{WORDMODEL_FILE}")
).unwrap();
utils::download_file(
&format!("{url}/{CHARMODEL_FILE}"),
&format!("{modulepath}/{CHARMODEL_FILE}")
).unwrap();
info!("Finished");

Ok(())
}

#[pyfunction]
pub fn cli_convert() -> PyResult<()> {
env_logger::Builder::from_env(Env::default().default_filter_or("info")).init();
let modulepath = pythonpath().expect("Error loading python module path");
let modulepath = module_path().expect("Error loading python module path");
debug!("Module path found: {}", modulepath);
let modelpath = Path::new("./LanguageModels");

Expand All @@ -127,6 +151,7 @@ pub fn cli_convert() -> PyResult<()> {
fn heli_otr(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(cli_run))?;
m.add_wrapped(wrap_pyfunction!(cli_convert))?;
m.add_wrapped(wrap_pyfunction!(cli_download))?;
m.add_class::<PyIdentifier>()?;
// m.add_class::<PyLang>()?;

Expand Down
48 changes: 48 additions & 0 deletions src/utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use std::process::exit;

use log::{info, debug, error};
use tokio::io::AsyncWriteExt;
use tokio::runtime::Runtime;
use futures_util::StreamExt;
use reqwest;


// Run a tokio task that listens for ctrl+c
async fn run_cancel_handler() {
tokio::spawn(async move {
match tokio::signal::ctrl_c().await {
Ok(_) => {
info!("Received Ctrl+C, terminating immediately.");
exit(1);
}
Err(e) => error!("Error listening for SIGINT: {}", e),
};
});
}

// Download a file to a path
pub fn download_file(url: &str, filepath: &str) -> Result<(), Box<dyn std::error::Error>> {
let runtime = Runtime::new()?;
runtime.block_on(async {
info!("Downloading file from '{url}'");
run_cancel_handler().await;
// Create a download stream
let response = reqwest::get(url).await?;
let status = response.status();
debug!("Response status: {}", status);
if !status.is_success() {
error!("Could not download file, HTTP status code: {status}");
exit(1);
}

let mut response_stream = response.bytes_stream();
let mut outfile = tokio::fs::File::create(filepath).await?;

debug!("Writing file to '{filepath}'");
// asyncronously write to the file every piece of bytes that come from the stream
while let Some(bytes) = response_stream.next().await {
outfile.write_all(&bytes?).await?;
}
Ok(())
})
}

0 comments on commit e5cff56

Please sign in to comment.