lsp_core/systems/
lov.rs

1use std::collections::HashMap;
2
3use bevy_ecs::{prelude::*, world::CommandQueue};
4use hashbrown::HashSet;
5use lsp_types::{TextDocumentItem, Url};
6use serde::Deserialize;
7use sophia_api::{
8    prelude::{Any, Dataset},
9    quad::Quad,
10    term::{matcher::TermMatcher, Term as _},
11};
12use tracing::{debug, error, info, instrument, span};
13
14use crate::{
15    prelude::*,
16    util::{
17        fs::Fs,
18        ns::{owl, rdfs},
19    },
20};
21
22#[derive(Deserialize, Debug)]
23struct Version {
24    #[serde(rename = "fileURL")]
25    file_url: Option<String>,
26    issued: chrono::DateTime<chrono::Utc>,
27}
28
29#[derive(Deserialize, Debug)]
30struct Vocab {
31    versions: Vec<Version>,
32}
33
34// Do we check whether or not the namespace url and the prefix url are the same?
35async fn extract_file_url(prefix: &str, client: &impl Client) -> Option<String> {
36    let url = format!(
37        "https://lov.linkeddata.es/dataset/lov/api/v2/vocabulary/info?vocab={}",
38        prefix
39    );
40    match client.fetch(&url, &std::collections::HashMap::new()).await {
41        Ok(resp) if resp.status == 200 => match serde_json::from_str::<Vocab>(&resp.body) {
42            Ok(x) => {
43                let versions: Vec<_> = x.versions.iter().flat_map(|x| &x.file_url).collect();
44                debug!(
45                    "Found lov response ({} versions) {:?}",
46                    x.versions.len(),
47                    versions
48                );
49                x.versions
50                    .into_iter()
51                    .flat_map(|x| x.file_url.map(|url| (url, x.issued)))
52                    .max_by_key(|x| x.1)
53                    .map(|x| x.0)
54            }
55            Err(e) => {
56                error!("Deserialize failed ({}) {:?}", url, e);
57                None
58            }
59        },
60        Ok(resp) => {
61            error!("Fetch ({}) failed status {}", url, resp.status);
62            None
63        }
64        Err(e) => {
65            error!("Fetch ({}) failed {:?}", url, e);
66            None
67        }
68    }
69}
70
71pub fn open_imports<C: Client + Resource>(
72    query: Query<(&Triples, &RopeC), Changed<Triples>>,
73    mut opened: Local<HashSet<String>>,
74    sender: Res<CommandSender>,
75    fs: Res<Fs>,
76    client: Res<C>,
77) {
78    for (triples, _) in &query {
79        for object in triples
80            .quads_matching(Any, [owl::imports], Any, Any)
81            .flatten()
82            .flat_map(|s| s.o().iri())
83            .flat_map(|s| Url::parse(s.as_str()))
84        {
85            if opened.contains(object.as_str()) {
86                continue;
87            }
88            opened.insert(object.as_str().to_string());
89
90            let fs = fs.clone();
91            let sender = sender.clone();
92            let fut = async move {
93                if let Some(content) = fs.0.read_file(&object).await {
94                    spawn_document(object, content, &sender.0, |_, _| {});
95
96                    let mut command_queue = CommandQueue::default();
97                    command_queue.push(move |world: &mut World| {
98                        world.run_schedule(SaveLabel);
99                    });
100                    let _ = sender.unbounded_send(command_queue);
101                } else {
102                    info!("No content found for {}", object);
103                }
104            };
105            client.spawn(fut);
106        }
107    }
108}
109
110/// First of al, fetch the lov dataset information at url https://lov.linkeddata.es/dataset/lov/api/v2/vocabulary/info?vocab=${prefix}
111/// Next, extract that json object into an object and find the latest dataset
112pub fn fetch_lov_properties<C: Client + Resource>(
113    sender: Res<CommandSender>,
114    query: Query<
115        &Prefixes,
116        (
117            Or<((Changed<Prefixes>, With<Open>), Changed<Open>)>,
118            // Without<Dirty>,
119        ),
120    >,
121    mut prefixes: Local<HashSet<String>>,
122    client: Res<C>,
123    fs: Res<Fs>,
124) {
125    for prefs in &query {
126        for prefix in prefs.0.iter() {
127            if !prefixes.contains(prefix.url.as_str()) {
128                prefixes.insert(prefix.url.to_string());
129                if let Some(url) = fs.0.lov_url(prefix.url.as_str(), &prefix.prefix) {
130                    info!("Other virtual url {}", url);
131                    if let Some(local) = lov::LOCAL_PREFIXES
132                        .iter()
133                        .find(|x| x.location == prefix.url.as_str())
134                    {
135                        debug!("Local lov");
136                        local_lov::<C>(local, url, &sender, fs.clone());
137                    } else {
138                        debug!("Remove lov");
139                        let sender = sender.0.clone();
140                        let c = client.as_ref().clone();
141                        client.spawn(fetch_lov(prefix.clone(), url, c, sender, fs.clone()));
142                    }
143                } else {
144                    debug!("Failed to find url");
145                }
146            } else {
147                debug!("Prefixes is already present {}", prefix.url);
148            }
149        }
150    }
151}
152
153type Sender = futures::channel::mpsc::UnboundedSender<CommandQueue>;
154fn spawn_document(
155    url: Url,
156    content: String,
157    sender: &Sender,
158    extra: impl FnOnce(Entity, &mut World) -> () + Send + Sync + 'static,
159) {
160    let mut command_queue = CommandQueue::default();
161    let item = TextDocumentItem {
162        version: 1,
163        uri: url.clone(),
164        language_id: String::from("turtle"),
165        text: String::new(),
166    };
167
168    let spawn = spawn_or_insert(
169        url.clone(),
170        (
171            RopeC(ropey::Rope::from_str(&content)),
172            Source(content.clone()),
173            Label(url.clone()), // this might crash
174            Wrapped(item),
175            Types(HashMap::new()),
176        ),
177        Some("turtle".into()),
178        (),
179    );
180
181    command_queue.push(move |world: &mut World| {
182        let span = span!(tracing::Level::INFO, "span lov");
183        let _enter = span.enter();
184        let e = spawn(world);
185
186        extra(e, world);
187
188        world.run_schedule(ParseLabel);
189        drop(_enter);
190    });
191
192    let _ = sender.unbounded_send(command_queue);
193}
194
195fn extra_from_lov<C: Client + Resource>(
196    from: FromPrefix,
197    content: String,
198    url: Url,
199    fs: Fs,
200) -> impl FnOnce(Entity, &mut World) + Send + Sync + 'static {
201    move |e, world| {
202        world.entity_mut(e).insert(from);
203
204        let client = world.resource::<C>();
205        client.spawn(async move {
206            fs.0.write_file(&url, &content).await;
207        });
208    }
209}
210
211async fn fetch_lov<C: Client + Resource>(prefix: Prefix, label: Url, c: C, sender: Sender, fs: Fs) {
212    if let Some(url) = extract_file_url(&prefix.prefix, &c).await {
213        match c.fetch(&url, &std::collections::HashMap::new()).await {
214            Ok(resp) if resp.status == 200 => {
215                let extra =
216                    extra_from_lov::<C>(FromPrefix(prefix), resp.body.clone(), label.clone(), fs);
217                spawn_document(label, resp.body, &sender, extra);
218            }
219            Ok(resp) => {
220                error!("Fetch ({}) failed status {}", url, resp.status);
221            }
222            Err(e) => {
223                error!("Fetch ({}) failed {:?}", url, e);
224            }
225        }
226    }
227}
228
229fn local_lov<C: Client + Resource>(
230    local: &lov::LocalPrefix,
231    label: Url,
232    sender: &Res<CommandSender>,
233    fs: Fs,
234) {
235    info!("Using local {}", local.name);
236
237    let from = FromPrefix(Prefix {
238        prefix: local.name.to_string(),
239        url: Url::parse(&local.location).unwrap(),
240    });
241
242    let extra = extra_from_lov::<C>(from, local.content.to_string(), label.clone(), fs);
243    spawn_document(label, local.content.to_string(), &sender.0, extra);
244}
245
246#[derive(Component)]
247pub struct OntologyExtract;
248
249#[instrument(skip(commands))]
250pub fn init_onology_extractor(mut commands: Commands, fs: Res<Fs>) {
251    for local in lov::LOCAL_PREFIXES
252        .iter()
253        .filter(|x| ["rdf", "rdfs", "owl"].iter().any(|y| *y == x.name))
254    {
255        let url = fs.0.lov_url(&local.location, &local.name).unwrap();
256        info!("Virtual url {}", url.to_string());
257
258        // let url = lsp_types::Url::from_str(local.location).unwrap();
259        let item = TextDocumentItem {
260            version: 1,
261            uri: url.clone(),
262            language_id: String::from("turtle"),
263            text: String::new(),
264        };
265
266        let spawn = spawn_or_insert(
267            url.clone(),
268            (
269                Source(local.content.to_string()),
270                RopeC(ropey::Rope::from_str(local.content)),
271                Label(url),
272                Wrapped(item),
273                Types(HashMap::new()),
274            ),
275            Some("turtle".into()),
276            OntologyExtract,
277        );
278
279        info!("Init onology {}", local.name);
280        commands.push(move |world: &mut World| {
281            info!("Spawned");
282            spawn(world);
283        });
284    }
285}
286
287#[instrument(skip(query, extractor))]
288pub fn check_added_ontology_extract(
289    query: Query<(&Triples, &Label), (Added<Triples>, With<OntologyExtract>)>,
290    mut extractor: ResMut<OntologyExtractor>,
291) {
292    let mut changed = false;
293    for (triples, label) in &query {
294        info!("Added triples from {}", label.as_str());
295        extractor.quads.extend(triples.0.iter().cloned());
296        changed = true;
297    }
298    if changed {
299        extractor.extract();
300    }
301}
302
303#[derive(Debug, Resource)]
304pub struct OntologyExtractor {
305    quads: Vec<MyQuad<'static>>,
306    properties: Vec<MyTerm<'static>>,
307    classes: Vec<MyTerm<'static>>,
308}
309
310struct LocalMatcher<'a> {
311    properties: &'a [MyTerm<'static>],
312}
313
314impl TermMatcher for LocalMatcher<'_> {
315    type Term = MyTerm<'static>;
316
317    fn matches<T2: sophia_api::prelude::Term + ?Sized>(&self, term: &T2) -> bool {
318        for p in self.properties {
319            if term.eq(p) {
320                return false;
321            }
322        }
323
324        true
325    }
326}
327
328impl OntologyExtractor {
329    pub fn new() -> Self {
330        Self {
331            quads: vec![],
332            classes: vec![MyTerm::<'static>::named_node(
333                "http://www.w3.org/2000/01/rdf-schema#Class",
334                0..1,
335            )],
336            properties: vec![MyTerm::<'static>::named_node(
337                "http://www.w3.org/1999/02/22-rdf-syntax-ns#Property",
338                0..1,
339            )],
340        }
341    }
342
343    pub fn properties<'a>(&'a self) -> &'a [MyTerm<'static>] {
344        &self.properties[..]
345    }
346
347    pub fn classes<'a>(&'a self) -> &'a [MyTerm<'static>] {
348        &self.classes[..]
349    }
350
351    fn extract_step(quads: &Vec<MyQuad<'static>>, items: &mut Vec<MyTerm<'static>>) -> bool {
352        let new_items: Vec<_> = quads
353            .quads_matching(
354                LocalMatcher { properties: &items },
355                [rdfs::subClassOf],
356                &items[..],
357                Any,
358            )
359            .flatten()
360            .map(|x| x.to_s().to_owned())
361            .collect();
362
363        let added = !new_items.is_empty();
364        items.extend(new_items);
365        added
366    }
367
368    fn extract(&mut self) {
369        loop {
370            if !OntologyExtractor::extract_step(&self.quads, &mut self.properties) {
371                break;
372            }
373        }
374
375        loop {
376            if !OntologyExtractor::extract_step(&self.quads, &mut self.classes) {
377                break;
378            }
379        }
380    }
381}
382
383// #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
384// pub enum EntryState {
385//     Ready,
386//     Transit,
387// }
388//
389// #[derive(Debug, Clone, Serialize, Deserialize)]
390// pub struct LovEntry {
391//     prefix: String,
392//     url: String,
393//     num: usize,
394//     state: EntryState,
395// }
396//
397// impl LovEntry {
398//     fn name(&self) -> String {
399//         format!("{}-{}.ttl", self.num, self.prefix)
400//     }
401//     pub fn url(&self, cache: &Cache) -> Option<Url> {
402//         self.file_url(cache).or_else(|| self.remote_url())
403//     }
404//
405//     #[cfg(not(target_arch = "wasm32"))]
406//     fn file_url(&self, cache: &Cache) -> Option<Url> {
407//         let p = cache.path()?;
408//         let url = p.join(self.name());
409//         Url::from_file_path(url).ok()
410//     }
411//
412//     #[cfg(target_arch = "wasm32")]
413//     fn file_url(&self, cache: &Cache) -> Option<Url> {
414//         None
415//     }
416//
417//     fn remote_url(&self) -> Option<Url> {
418//         Url::from_str(&self.url).ok()
419//     }
420//
421//     pub fn save(&mut self, cache: &Cache, content: &str) -> Option<()> {
422//         self.state = EntryState::Ready;
423//         cache.write_file(&self.name(), content)
424//     }
425// }
426//
427// #[derive(Debug, Clone, Serialize, Deserialize, Resource)]
428// pub struct LovHelper {
429//     entries: Vec<LovEntry>,
430// }
431//
432// impl LovHelper {
433//     fn try_from_cache(cache: &Cache) -> Option<Self> {
434//         let c = cache.get_file("index.json")?;
435//         info!("Found index file! {}", c);
436//         serde_json::from_str(&c).ok()
437//     }
438//
439//     pub fn from_cache(cache: &Cache) -> Self {
440//         Self::try_from_cache(cache).unwrap_or_else(|| Self {
441//             entries: Vec::new(),
442//         })
443//     }
444//
445//     pub fn save(mut self, cache: &Cache) -> Option<()> {
446//         self.entries = self
447//             .entries
448//             .into_iter()
449//             .filter(|x| x.state == EntryState::Ready)
450//             .collect();
451//         let st = serde_json::to_string(&self).ok()?;
452//         info!("Save index file! {}", st);
453//         cache.write_file("index.json", &st)
454//     }
455//
456//     pub fn has_entry_mut(&mut self, prefix: &Prefix) -> Option<&mut LovEntry> {
457//         self.entries
458//             .iter_mut()
459//             .find(|e| e.prefix == prefix.prefix && e.url == prefix.url.as_str())
460//     }
461//
462//     pub fn has_entry(&self, prefix: &Prefix) -> Option<&LovEntry> {
463//         self.entries
464//             .iter()
465//             .find(|e| e.prefix == prefix.prefix && e.url == prefix.url.as_str())
466//     }
467//
468//     pub fn create_entry(&mut self, prefix: &Prefix) -> &LovEntry {
469//         debug!("Create entry for {:?}", prefix);
470//         if let Some(e) = self.entries.iter().enumerate().find_map(|(i, e)| {
471//             (e.prefix == prefix.prefix && e.url == prefix.url.as_str()).then_some(i)
472//         }) {
473//             return &self.entries[e];
474//         }
475//         let c = self
476//             .entries
477//             .iter()
478//             .filter(|x| x.prefix == prefix.prefix)
479//             .count();
480//         let entry = LovEntry {
481//             prefix: prefix.prefix.to_string(),
482//             url: prefix.url.to_string(),
483//             num: c,
484//             state: EntryState::Transit,
485//         };
486//         self.entries.push(entry);
487//         self.entries.last().unwrap()
488//     }
489//
490//     pub fn save_prefix(&mut self, cache: &Cache, prefix: &Prefix, content: &str) -> Option<()> {
491//         let e = self
492//             .entries
493//             .iter_mut()
494//             .find(|e| (e.prefix == prefix.prefix && e.url == prefix.url.as_str()))?;
495//         e.save(cache, content)
496//     }
497// }
498
499#[derive(Debug, Clone, Component)]
500pub struct FromPrefix(pub Prefix);