Skip to content

Graphs, nodes and edges reworked. #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,7 @@
go.work
*.idea
*.DS_Store

#in between saved files
*.json
*.pt
Binary file added __pycache__/text_embedding_model.cpython-312.pyc
Binary file not shown.
51,541 changes: 51,541 additions & 0 deletions citation_network.ttl

Large diffs are not rendered by default.

614 changes: 614 additions & 0 deletions citation_network_tiny.ttl

Large diffs are not rendered by default.

63 changes: 63 additions & 0 deletions citation_networks/citation_graph_creation.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package citation

/*
This file creates graph from already extracted data +, that is saved in a map[string]map[string]interface{} and exported to json file.
It returns completed graph.
*/

import (
"encoding/json"
"fmt"
"os"

"github.com/jmCodeCraft/go-network/model"
)

func Create_graph(file_name string) *model.NewGraph {
file, err := os.Open(file_name)
if err != nil {
fmt.Println("Error opening file:", err)
return nil
}
defer file.Close()

// Declare a map to hold the data
var all_atribute_map map[string]map[string]interface{}

// Decode the JSON data into the map
decoder := json.NewDecoder(file)
if err := decoder.Decode(&all_atribute_map); err != nil {
fmt.Println("Error decoding JSON:", err)
return nil
}

g := model.NewGraph{
Nodes: map[string]model.NewNode{},
Edges: map[int]model.NewEdge{},
}

for key, value := range all_atribute_map {
g.AddNode(model.NewNode{
ID: key,
Attributes: value,
})
}

for _, node := range g.Nodes {
if neighbors, ok := node.Attributes["neighbors"].([]string); ok {
for _, neighbor := range neighbors {
g.AddEdge(model.NewEdge{
First_node: node,
Second_node: g.Nodes[neighbor],
Attributes: map[string]interface{}{},
})
}
}
delete(node.Attributes, "neighbor")
}

fmt.Println("Graph creation successful!")

return &g

}
204 changes: 204 additions & 0 deletions citation_networks/data_extraction.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
package citation

/*
This file extracts data for citation network from turtle (.ttl) file. It not only extracts connections between different
citations, but also extracts some of the key elements from each link. It extracts keywords and text which is transformed
and saved as an embedding.
*/

import (
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"os"
"os/exec"
"sort"
"strconv"
"strings"

"github.com/nvkp/turtle"
)

type Triple struct {
Subject string `turtle:"subject"`
Predicate string `turtle:"predicate"`
Object string `turtle:"object"`
}

func OnPage(link string) map[string]interface{} {
res, err := http.Get(link)
if err != nil {
log.Fatal(err)
}
defer res.Body.Close()

// Read the response body
content, err := io.ReadAll(res.Body)
if err != nil {
log.Fatal(err)
}

// Create a map to store the parsed JSON
var result map[string]interface{}

// Parse the JSON into the map
err = json.Unmarshal(content, &result)
if err != nil {
log.Fatal("Error parsing JSON:", err)
}

return result
}

func reconstructText(wordPositions map[string]interface{}) string {
// Slice to hold words in order of positions
var words []string

// Collect positions and words
positionWordMap := make(map[float64]string)
for word, positions := range wordPositions {
positions := positions.([]interface{})
for _, pos := range positions {
x := pos.(float64)
positionWordMap[x] = word
}
}

// Sort positions and build the ordered text
var positions []float64
for pos := range positionWordMap {
positions = append(positions, pos)
}
sort.Float64s(positions)

for _, pos := range positions {
words = append(words, positionWordMap[pos])
}

return strings.Join(words, " ")
}

func GetKeywords(api_map map[string]interface{}) []string {
x := api_map["keywords"]
var words []string

if intSlice, ok := x.([]interface{}); ok {
for _, value := range intSlice {
if slice, ok := value.(map[string]interface{}); ok {
str, ok := slice["display_name"].(string)
if ok {
words = append(words, str)
}
}
}
} else {
fmt.Println("x is not a slice of ints")
}
return words
}

func GetText(api_map map[string]interface{}) []float64 {
inverted_text, ok := api_map["abstract_inverted_index"].(map[string]interface{})
var text string
if ok {
text = reconstructText(inverted_text)
}

cmd := exec.Command("python", "text_embedding_model.py", text)
output, err := cmd.Output()
if err != nil {
log.Fatal("Error getting text embedding: ", err)
}

outputStr := strings.TrimSpace(string(output))

outputStr = strings.ReplaceAll(outputStr, "[", "")
outputStr = strings.ReplaceAll(outputStr, "]", "")

parts := strings.Split(outputStr, ", ")

var result []float64
for _, part := range parts {
value, err := strconv.ParseFloat(part, 64)
if err != nil {
log.Fatal("Error transformint string to float64: ", err)
}
result = append(result, value)
}
return result
}

func GetNodeAttributes(api_map map[string]interface{}) map[string]interface{} {
attr := map[string]interface{}{}
attr["keywords"] = GetKeywords(api_map)
attr["text_embedding"] = GetText(api_map)
return attr
}

func Extract(file_name string) string {
file, err := os.Open(file_name) //"citation_network_tiny.ttl"
if err != nil {
fmt.Println("Error opening file:", err)
return ""
}
defer file.Close()

var triples []Triple

byteFile, err := io.ReadAll(file)
if err != nil {
fmt.Println("Error converting file:", err)
return ""
}
err = turtle.Unmarshal(byteFile, &triples)
if err != nil {
fmt.Println("Error unmarshaling Turtle data:", err)
return ""
}

allAttributes := make(map[string]map[string]interface{})
for _, triple := range triples {
_, ok1 := allAttributes[triple.Subject]
_, ok2 := allAttributes[triple.Object]
if !ok1 {
str := strings.Replace(triple.Subject, "https://semopenalex.org/work", "https://api.openalex.org/works", 1)
api_map := OnPage(str)
allAttributes[triple.Subject] = /*map[string]interface{}{}*/ GetNodeAttributes(api_map)
}

if !ok2 {
str := strings.Replace(triple.Object, "https://semopenalex.org/work", "https://api.openalex.org/works", 1)
api_map := OnPage(str)
allAttributes[triple.Object] = /*map[string]interface{}{}*/ GetNodeAttributes(api_map)
}
_, exists := allAttributes[triple.Subject]["neighbors"]

if !exists {
var neighbors []string
allAttributes[triple.Subject]["neighbors"] = neighbors
}

allAttributes[triple.Subject]["neighbors"] = append(allAttributes[triple.Subject]["neighbors"].([]string), triple.Object)
}

newfile_name := "citation_network_tiny_extracted_data.json"

newfile, err := os.Create(newfile_name)
if err != nil {
fmt.Println("Error creating file:", err)
return ""
}
defer newfile.Close()

encoder := json.NewEncoder(newfile)
encoder.SetIndent("", " ")
if err := encoder.Encode(allAttributes); err != nil {
fmt.Println("Error encoding JSON:", err)
return ""
}

fmt.Println("Data extraction succesful!")
return newfile_name
}
15 changes: 14 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
module github.com/jmCodeCraft/go-network

go 1.21.1
go 1.23

toolchain go1.23.2

require (
github.com/jinzhu/copier v0.4.0
github.com/mroth/weightedrand v1.0.0
)

require github.com/nvkp/turtle v1.2.1

require (
github.com/PuerkitoBio/goquery v1.10.0 // indirect
github.com/andybalholm/cascadia v1.3.2 // indirect
github.com/knakk/rdf v0.0.0-20190304171630-8521bf4c5042 // indirect
github.com/sashabaranov/go-openai v1.35.6 // indirect
github.com/yalue/onnxruntime_go v1.13.0 // indirect
golang.org/x/net v0.29.0 // indirect
)
48 changes: 48 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,4 +1,52 @@
github.com/PuerkitoBio/goquery v1.10.0 h1:6fiXdLuUvYs2OJSvNRqlNPoBm6YABE226xrbavY5Wv4=
github.com/PuerkitoBio/goquery v1.10.0/go.mod h1:TjZZl68Q3eGHNBA8CWaxAN7rOU1EbDz3CWuolcO5Yu4=
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/jinzhu/copier v0.4.0 h1:w3ciUoD19shMCRargcpm0cm91ytaBhDvuRpz1ODO/U8=
github.com/jinzhu/copier v0.4.0/go.mod h1:DfbEm0FYsaqBcKcFuvmOZb218JkPGtvSHsKg8S8hyyg=
github.com/knakk/rdf v0.0.0-20190304171630-8521bf4c5042 h1:Vzdm5hdlLdpJOKK+hKtkV5u7xGZmNW6aUBjGcTfwx84=
github.com/knakk/rdf v0.0.0-20190304171630-8521bf4c5042/go.mod h1:fYE0718xXI13XMYLc6iHtvXudfyCGMsZ9hxSM1Ommpg=
github.com/mroth/weightedrand v1.0.0 h1:V8JeHChvl2MP1sAoXq4brElOcza+jxLkRuwvtQu8L3E=
github.com/mroth/weightedrand v1.0.0/go.mod h1:3p2SIcC8al1YMzGhAIoXD+r9olo/g/cdJgAD905gyNE=
github.com/nvkp/turtle v1.2.1 h1:OEDikZt2pnjIwIBrMuusjfPdSZKIXfuo8QDriMFNiVg=
github.com/nvkp/turtle v1.2.1/go.mod h1:Xfd8dQgz1QvPI6Tpie7FESiadCjRhCkd0ypDmJOWxxI=
github.com/sashabaranov/go-openai v1.35.6 h1:oi0rwCvyxMxgFALDGnyqFTyCJm6n72OnEG3sybIFR0g=
github.com/sashabaranov/go-openai v1.35.6/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
github.com/yalue/onnxruntime_go v1.13.0 h1:5HDXHon3EukQMyYA7yPMed/raWaDE/gjwLOwnVoiwy8=
github.com/yalue/onnxruntime_go v1.13.0/go.mod h1:b4X26A8pekNb1ACJ58wAXgNKeUCGEAQ9dmACut9Sm/4=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo=
golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
Loading