diff --git a/INSTALL b/INSTALL
index f1ad87d6ea..e584746094 100644
--- a/INSTALL
+++ b/INSTALL
@@ -83,6 +83,9 @@ Contents
natively in UTF-8 mode by setting "default-character-set=utf8"
in various parts of your "my.cnf" file, such as in the
"[mysql]" part and elsewhere; but this is not really required.
+ Note also that you may encounter problems when MySQL is run in
+ "strict mode"; you may want to configure your "my.cnf" in order
+ to avoid using strict mode (such as `STRICT_ALL_TABLES`).
+
+
diff --git a/modules/bibformat/etc/format_templates/People_HTML_detailed.bft b/modules/bibformat/etc/format_templates/People_HTML_detailed.bft
index d7c6d3e197..ecf191c2d1 100644
--- a/modules/bibformat/etc/format_templates/People_HTML_detailed.bft
+++ b/modules/bibformat/etc/format_templates/People_HTML_detailed.bft
@@ -1,14 +1,132 @@
" + title + ": " + content + "
" def escape_values(bfo): """ diff --git a/modules/bibformat/lib/elements/bfe_authority_description.py b/modules/bibformat/lib/elements/bfe_authority_description.py new file mode 100644 index 0000000000..0847f230a9 --- /dev/null +++ b/modules/bibformat/lib/elements/bfe_authority_description.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +# +# This file is part of Invenio. +# Copyright (C) 2015, 2016 CERN. +# +# Invenio is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# Invenio is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Invenio; if not, write to the Free Software Foundation, Inc., +# 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. +"""BibFormat element - Prints the author's description information. + +Description contains "Home Institute (short) - Department/Group". +Example: CERN - IT/OIS +""" + + +def format_element(bfo, icon="yes"): + """Return description for the record. + + :param string icon: display icon for the element if 'yes' + """ + result = "" + description = bfo.fields("371__h") + + if description: + result = description[0] + if icon.lower() == "yes": + icon_class = "fa fa-home" + result = " {1}".format(icon_class, result) + + return result + + +def escape_values(bfo): + """Escape return value of element. + + Called by BibFormat in order to check if output of this element + should be escaped. + """ + return 0 diff --git a/modules/bibformat/lib/elements/bfe_authority_links.py b/modules/bibformat/lib/elements/bfe_authority_links.py index 1e5893a8af..b01fe36fdd 100644 --- a/modules/bibformat/lib/elements/bfe_authority_links.py +++ b/modules/bibformat/lib/elements/bfe_authority_links.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of Invenio. -# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. +# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2015, 2016 CERN. # # Invenio is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as @@ -19,8 +19,6 @@ """BibFormat element - Prints the control number of an Authority Record. """ -from invenio.config import CFG_SITE_URL, CFG_SITE_NAME - from invenio.bibauthority_config import \ CFG_BIBAUTHORITY_AUTHORITY_COLLECTION_NAME, \ CFG_BIBAUTHORITY_RECORD_CONTROL_NUMBER_FIELD, \ @@ -28,12 +26,81 @@ from invenio.bibauthority_engine import \ get_low_level_recIDs_from_control_no, \ get_dependent_records_for_control_no +from invenio.config import CFG_CACHEDIR +from invenio.viafutils import get_wikipedia_link, get_wiki_link_from_record -from invenio.viafutils import get_wikipedia_link,get_wiki_link_from_record +import json +import urllib2 +import os.path __revision__ = "$Id$" -def format_element(bfo): + +def get_inspire_name_by_inspire_id( + inspire_id, + json_file=os.path.join(CFG_CACHEDIR, "inspirehep-names-mapping.json")): + """Get INSPIRE-HEP-name given the INSPIRE-ID. + + :param string inspire_id: INSPIRE-ID + :param file json_file: INSPIRE-HEP mapping + """ + inspire_name = None + try: + with open(json_file) as f: + try: + inspire_mapping = json.load(f) + inspire_name = inspire_mapping.get(inspire_id) + except ValueError: + pass + except EnvironmentError: + pass + return inspire_name + + +def get_inspire_profile(inspire_id, val, use_inspirehepname=False): + """Get HTML element for INSPIRE-HEP Profile. + + :param bool use_inspirehepname: use INSPIRE-HEP search query if + False, otherwise call get_inspire_name_by_inspire_id + """ + result = None + + if use_inspirehepname: + inspirehep_name = get_inspire_name_by_inspire_id(inspire_id) + if inspirehep_name: + url = "https://inspirehep.net/author/profile/{0}".format( + inspirehep_name) + else: + url = ("http://inspirehep.net/search?cc=HepNames&p=035__a%3A{0}&of=hd" + .format(inspire_id)) + + result = "{1}".format(url, val) + return result + + +def get_cern_phonebook(cern_id, val): + """Get HTML element for CERN Phonebook.""" + phonebook_url = ( + "https://phonebook.cern.ch/phonebook/#personDetails/?id={0}" + .format(cern_id)) + return "{1}".format(phonebook_url, val) + + +def get_cern_profile(cern_id, val): + """Get HTML element for CERN Profile.""" + html_element = None + cern_profile_url = "http://profiles.web.cern.ch/{0}".format(cern_id) + req = urllib2.Request(cern_profile_url) + + try: + urllib2.urlopen(req) + html_element = "{1}".format(cern_profile_url, val) + except urllib2.HTTPError: + pass + return html_element + + +def format_element(bfo, print_title="yes"): """ Prints the control number of an author authority record in HTML. By default prints brief version. @@ -47,7 +114,6 @@ def format_element(bfo): control_nos = [d['a'] for d in bfo.fields('035__') if d['a'] is not None] control_nos = filter(None, control_nos) # fastest way to remove empty ""s - style = "style='width:auto;height:20px;margin-right:10px'" links_formatted = [] for control_no in control_nos: from urllib import quote @@ -60,35 +126,51 @@ def format_element(bfo): link_to_wikipedia = get_wikipedia_link(viaf_id) ## Wikipedia link with wiki icon if link_to_wikipedia: - image_element = image_pattern % { "text": "Wikipedia link", "image": "wikipedia.png", "external_article": link_to_wikipedia} + image_element = image_pattern % { "text": "Wikipedia", "image": "wikipedia.png", "external_article": link_to_wikipedia} links_formatted.append(image_element) ## VIAF link image_element = image_pattern \ - % { "text" : "VIAF cluster link","image": "viaf.png", "external_article": str("http://viaf.org/viaf/"+viaf_id) } + % { "text" : "VIAF cluster","image": "viaf.png", "external_article": str("http://viaf.org/viaf/"+viaf_id) } links_formatted.append(image_element) ## Library of congress link + if (control_no.find("|(DLC)") != -1): dlc_id = control_no.split("|(DLC)")[1].replace(" ","") link_to_lccn = "http://lccn.loc.gov/"+ dlc_id - image_element = image_pattern % { "text": "Library of Congress link", "image": "library_of_congress.png", "external_article" : link_to_lccn } + image_element = image_pattern % { "text": "Library of Congress", "image": "library_of_congress.png", "external_article" : link_to_lccn } links_formatted.append(image_element) + if (control_no.find("|(INSPIRE)") != -1): + inspire_profile = get_inspire_profile( + control_no.split("|(INSPIRE)")[1], + _("INSPIRE-HEP Profile")) + (links_formatted.append(inspire_profile) + if inspire_profile is not None else None) + if (control_no.find("|(SzGeCERN)") != -1): + cern_id = control_no.split("|(SzGeCERN)")[1] + links_formatted.append( + get_cern_phonebook(cern_id, _("CERN Phonebook"))) + html_element = get_cern_profile(cern_id, _("CERN Profile")) + if html_element: + links_formatted.append(html_element) + result = "" if links_formatted: + result = "" + title + ": " + content + "
" - else: - return None def escape_values(bfo): - """ + """Escape return value of element. + Called by BibFormat in order to check if output of this element should be escaped. """ diff --git a/modules/bibformat/lib/elements/bfe_authority_publications.py b/modules/bibformat/lib/elements/bfe_authority_publications.py index 79a0d2233e..b92ab4ea3d 100644 --- a/modules/bibformat/lib/elements/bfe_authority_publications.py +++ b/modules/bibformat/lib/elements/bfe_authority_publications.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of Invenio. -# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 CERN. +# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2015, 2016 CERN. # # Invenio is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as @@ -19,95 +19,104 @@ """BibFormat element - Prints the control number of an Authority Record. """ +from invenio.bibauthority_config import ( + CFG_BIBAUTHORITY_AUTHORITY_COLLECTION_NAME, + CFG_BIBAUTHORITY_RECORD_CONTROL_NUMBER_FIELD, + CFG_BIBAUTHORITY_RECORD_AUTHOR_CONTROL_NUMBER_FIELDS as control_number_fields) +from invenio.bibauthority_engine import ( + get_low_level_recIDs_from_control_no, + get_dependent_records_for_control_no) from invenio.config import CFG_SITE_URL, CFG_SITE_NAME +from invenio.search_engine import get_fieldvalues, perform_request_search -from invenio.bibauthority_config import \ - CFG_BIBAUTHORITY_AUTHORITY_COLLECTION_NAME, \ - CFG_BIBAUTHORITY_RECORD_CONTROL_NUMBER_FIELD, \ - CFG_BIBAUTHORITY_RECORD_AUTHOR_CONTROL_NUMBER_FIELDS as control_number_fields, \ - CFG_BIBAUTHORITY_AUTHORITY_COLLECTION_IDENTIFIER as authority_identifier -from invenio.bibauthority_engine import \ - get_low_level_recIDs_from_control_no, \ - get_dependent_records_for_control_no - -from invenio.search_engine import get_fieldvalues CFG_BIBAUTHORITY_PUBLICATION_VIEW_LIMIT = 10 __revision__ = "$Id$" -def format_element(bfo): + +def get_record_ids_for_authority_ids(authority_ids, author_full_name): + """Return list of record ids for a given authority ids (control numbers). + + If no record ids have been found for the given ids, do a search based + on the author's full name, stored in the field `100__a`. + + :param list authority_id: authority ids (also known as control numbers) + :param str author_full_name: if no record ids have been found for the given + authority_id, search by author_full_name + + :return: list of record ids, or empty list, if no record ids have been found + """ + record_ids = [] + for authority_id in authority_ids: + record_ids.extend(get_dependent_records_for_control_no(authority_id)) + record_ids.extend(get_dependent_records_for_control_no( + authority_id.replace("AUTHOR|(SzGeCERN)", "CERN-"))) + record_ids.extend(get_dependent_records_for_control_no( + authority_id.replace("AUTHOR|(SzGeCERN)", "CCID-"))) + if not record_ids: + record_ids.extend(get_dependent_records_for_control_no( + authority_id.replace("AUTHOR|(INSPIRE)", ""))) + + if not record_ids: + # No record ids for the given authority ids have been found + # Search for record ids using the author's full name + record_ids.extend(perform_request_search( + p="author:\"{0}\"".format(author_full_name))) + + # Remove possible duplicates + return list(set(record_ids)) + + +def format_element(bfo, print_title="yes"): """ Prints the control number of an author authority record in HTML. By default prints brief version. - @param brief: whether the 'brief' rather than the 'detailed' format @type brief: 'yes' or 'no' """ - from invenio.messages import gettext_set_language _ = gettext_set_language(bfo.lang) # load the right message language - control_nos = [d['a'] for d in bfo.fields('035__') if d.get('a')] - authority_type = [d['a'] for d in bfo.fields('980__') if d.get('a') and d.get('a')!=authority_identifier] - if authority_type and type(authority_type) is list: - authority_type = authority_type[0] - + control_nos.append("AUTHOR|(CDS){0}".format(bfo.control_field("001"))) - previous_recIDs = [] - parameters = [] - count = 0 publications_formatted = [] - recids_added = set() - ## for every control number that this author has, find all the connected records for each one - for control_no in control_nos: - for control_number_field in control_number_fields.get(authority_type,[]): - parameters.append(control_number_field + ":" + control_no.replace(" ","")) - recIDs = [x for x in get_dependent_records_for_control_no(control_no) if x not in previous_recIDs] - length = len(recIDs) or None - from urllib import quote - # if we have dependent records, provide a link to them - if length: - prefix_pattern = "" - postfix = "" - url_str = '' - # print as many of the author's publications as the CFG_BIBAUTHORITY_PUBLICATION_VIEW_LIMIT allows - for i in range(length if length" + title + ": " + content + "
" - else: - return "" + result = ("Invenio Access Control Engine can be called from within your Python programs via both a regular Python API and CLI. -In addition the you get an explanation of the program flow. +In addition to the above features, you also get an explanation of the program flow. Contents: 1. Regular API diff --git a/modules/websearch/lib/search_engine.py b/modules/websearch/lib/search_engine.py index ab15175be8..eb78a0cc09 100644 --- a/modules/websearch/lib/search_engine.py +++ b/modules/websearch/lib/search_engine.py @@ -97,7 +97,8 @@ InvenioWebSearchReferstoLimitError, \ InvenioWebSearchCitedbyLimitError, \ CFG_WEBSEARCH_IDXPAIRS_FIELDS,\ - CFG_WEBSEARCH_IDXPAIRS_EXACT_SEARCH + CFG_WEBSEARCH_IDXPAIRS_EXACT_SEARCH, \ + CFG_WEBSEARCH_BLACKLISTED_FORMATS from invenio.search_engine_utils import (get_fieldvalues, get_fieldvalues_alephseq_like, record_exists) @@ -1711,7 +1712,7 @@ def get_synonym_terms(term, kbr_name, match_type, use_memoise=False): return dterms.keys() -def wash_output_format(ouput_format): +def wash_output_format(ouput_format, verbose=False, req=None): """Wash output format FORMAT. Currently only prevents input like 'of=9' for backwards-compatible format that prints certain fields only. (for this task, 'of=tm' is preferred)""" @@ -1719,6 +1720,12 @@ def wash_output_format(ouput_format): # asked to print MARC tags, but not enough digits, # so let's switch back to HTML brief default return 'hb' + elif format in CFG_WEBSEARCH_BLACKLISTED_FORMATS: + if verbose: + write_warning("Selected format is not available through perform_request_search", req=req) + # Returning an empty list seems dangerous because you wouldn't know + # right away that the list is not supposed to be empty. + return 'hb' else: return ouput_format @@ -5714,7 +5721,7 @@ def prs_wash_arguments(req=None, cc=CFG_SITE_NAME, c=None, p="", f="", rg=CFG_WE """ # wash output format: - of = wash_output_format(of) + of = wash_output_format(of, verbose=verbose, req=req) # wash all arguments requiring special care p = wash_pattern(p) diff --git a/modules/websearch/lib/search_engine_config.py b/modules/websearch/lib/search_engine_config.py index 17d8daf1da..2bc848a427 100644 --- a/modules/websearch/lib/search_engine_config.py +++ b/modules/websearch/lib/search_engine_config.py @@ -50,6 +50,11 @@ # interfaces (0=simple, 1=advanced, 2=add-to-search): CFG_WEBSEARCH_ENABLED_SEARCH_INTERFACES = [0,1,2] +# CFG_WEBSEARCH_BLACKLISTED_FORMATS -- list of formats that will be refused +# by perform_request_search: +# * recstruct is an internal format thus should not be exposed +CFG_WEBSEARCH_BLACKLISTED_FORMATS = ["recstruct", "wapaff", "wapdat"] + class InvenioWebSearchUnknownCollectionError(Exception): """Exception for bad collection.""" diff --git a/modules/websearch/lib/websearch_external_collections_getter_unit_tests.py b/modules/websearch/lib/websearch_external_collections_getter_unit_tests.py index 8d55a05730..5ac036d9b7 100644 --- a/modules/websearch/lib/websearch_external_collections_getter_unit_tests.py +++ b/modules/websearch/lib/websearch_external_collections_getter_unit_tests.py @@ -51,7 +51,7 @@ def test_async_download(self): ## - test 1 bad IP: 1.2.3.4 ## Return the list of errors. checks = [ - {'url': 'http://invenio-software.org', 'content': 'About Invenio'}, + {'url': 'http://invenio-software.org', 'content': 'Invenio'}, {'url': 'http://rjfreijoiregjreoijgoirg.fr'}, {'url': 'http://1.2.3.4/'}] diff --git a/modules/websubmit/lib/functions/Stamp_Uploaded_Files.py b/modules/websubmit/lib/functions/Stamp_Uploaded_Files.py index e8b5b42147..40c9cee7e6 100644 --- a/modules/websubmit/lib/functions/Stamp_Uploaded_Files.py +++ b/modules/websubmit/lib/functions/Stamp_Uploaded_Files.py @@ -302,8 +302,8 @@ def visit_for_stamping(visit_for_stamping_arguments, dirname, filenames): 'file_stamper_options' members. @param dirname: (string) - the path to the directory in which the files are to be stamped. - @param filenames: (list) - the names of each file in dirname. An - attempt will be made to stamp each of these files. + @param filenames: (list) - the names of each file and subdirectory in + dirname. An attempt will be made to stamp each of the files. @Exceptions Raised: + InvenioWebSubmitFunctionWarning; + InvenioWebSubmitFunctionError; @@ -345,6 +345,10 @@ def visit_for_stamping(visit_for_stamping_arguments, dirname, filenames): path_to_subject_file = "%s/%s" % (dirname, file_to_stamp) file_stamper_options['input-file'] = path_to_subject_file + if not os.path.isfile(path_to_subject_file): + # If it's not a file, we can't stamp it. Continue with next file + continue + ## Just before attempting to stamp the file, log the dictionary of ## options (file_stamper_options) that will be passed to websubmit- ## file-stamper: