-
Notifications
You must be signed in to change notification settings - Fork 0
/
doi2bib
executable file
·111 lines (99 loc) · 3.64 KB
/
doi2bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#!/usr/bin/env python
# This script takes a DOI as a command line argument or a redirected file
# with a list of DOIs and uses doi.org to generate a .bib file which is
# output to the screen.
#
# Timothy Crone ([email protected])
#imports
import subprocess, re, sys
from select import select
from unicode_dict import unicode_dict
# read input dois
dois = []
if len(sys.argv) == 2:
dois.append(sys.argv[1].strip())
else:
timeout = 0.1
rlist, _, _ = select([sys.stdin], [], [], timeout)
if rlist:
for line in sys.stdin:
if line[0] != '#':
dois.append(line.strip())
#error check dois
if len(dois) == 0:
sys.stdout.write("Pass a DOI as a single argument, or pipe in a file with a list of DOIs.\n")
raise SystemExit
# get value function
def get_value(bib, key):
try:
#value = re.sub('}$', '', bib.split('%s={' % key)[1].encode("utf-8").split('}, ')[0]).decode("utf-8")
value = bib.split('%s={' % key)[1].split('}, ')[0]
for i in re.sub('[ -~]', '', value):
value = value.replace(i, unicode_dict[i])
except:
value = ''
return value
# loop through dois
for doi in dois:
#better:
#curl -LH "Accept: text/bibliography" "https://api.crossref.org/v1/works/http://dx.doi.org/10.1029/2001JB000918.xml"
# get raw bib entry
cmd = ('curl -s -LH "Accept: text/bibliography; style=bibtex" "http://dx.doi.org/%s"' % doi)
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
bib = p.communicate()[0].decode("utf-8").strip()[:-1]
#print(bib.encode("utf-8").strip())
#raise SystemExit
# extract components of entry
try:
entry = re.sub('}$', '', bib.split('{',1)[0].split('@')[1]).capitalize()
except:
entry = False
author = get_value(bib, 'author')
journal = get_value(bib, 'journal').replace('&', '{\&}')
year = get_value(bib, 'year')
month = get_value(bib, 'month').capitalize()
volume = get_value(bib, 'volume')
number = get_value(bib, 'number')
pages = get_value(bib, 'pages')
doi = get_value(bib, 'DOI')
publisher = get_value(bib, 'publisher').replace('&', '{\&}')
title = get_value(bib, 'title').replace('&', '{\&}')
# simplify author for key
author_simple = author.split(', ',1)[0].replace('}','').replace('{','')
author_simple = re.sub('\\\.','',author_simple)
author_simple = re.sub(' *','',author_simple)
if author_simple.isupper():
author_simple = author_simple.lower().capitalize()
# reorder author
reauthor = []
if len(author) > 0:
for i in author.split(' and '):
if i == u'et al.':
name = i
else:
name = i.split(',', 1)[1].strip() + ' ' + i.split(',', 1)[0].strip()
reauthor.append(name)
author = ' and '.join(reauthor)
# remove periods from author initials
author = author.replace('.',' ')
author = author.replace(' ',' ')
# enclose consecutive and title caps in braces
if len(publisher) > 0:
publisher = re.sub('([A-Z]{2,})', r'{\1}', publisher)
if len(title) > 0:
title = title[0] + re.sub('([A-Z]{1,})', r'{\1}', title[1:])
# print entry
if entry:
sys.stdout.write('@%s{%s%s,\n' % (entry, author_simple, year))
sys.stdout.write(' author = {%s},\n' % author)
sys.stdout.write(' title = {%s},\n' % title)
sys.stdout.write(' journal = {%s},\n' % journal)
sys.stdout.write(' year = {%s},\n' % year)
sys.stdout.write(' volume = {%s},\n' % volume)
sys.stdout.write(' number = {%s},\n' % number)
if "n/a" not in pages:
sys.stdout.write(' pages = {%s},\n' % pages)
sys.stdout.write(' doi = {%s},\n' % doi)
sys.stdout.write(' publisher = {%s},\n' % publisher)
sys.stdout.write('}\n')
sys.stdout.write('\n')