{"id":31859,"date":"2025-02-10T10:39:34","date_gmt":"2025-02-10T09:39:34","guid":{"rendered":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m-2\/"},"modified":"2025-02-10T10:56:45","modified_gmt":"2025-02-10T09:56:45","slug":"oai-clarino-uib-no-jos1m","status":"publish","type":"language-resource","link":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/","title":{"rendered":"Training Corpus jos1M"},"content":{"rendered":"<p><?xml version=\"1.0\" encoding=\"utf-8\"?><br \/>\n<record><\/p>\n<header><identifier>oai:clarino.uib.no:Jos1M<\/identifier><datestamp>2018-03-06T10:18:36Z<\/datestamp><setSpec\/><\/header>\n<p><metadata><CMD xmlns=\"http:\/\/www.clarin.eu\/cmd\/\"><Header><MdCreator>Cheikh Bamba Dione<\/MdCreator><MdCreationDate>2017-03-28<\/MdCreationDate><MdSelfLink>hdl:11495\/DE48-E976-8C42-5<\/MdSelfLink><MdProfile>clarin.eu:cr1:p_1407745711925<\/MdProfile><MdCollectionDisplayName>Clarino Bergen Centre &#8211; INESS<\/MdCollectionDisplayName><\/Header><Resources><ResourceProxyList\/><JournalFileProxyList\/><ResourceRelationList\/><IsPartOfList\/><\/Resources><Components><corpusProfile><resourceCommonInfo ComponentId=\"clarin.eu:cr1:c_1396012485126\"><resourceType>corpus<\/resourceType><identificationInfo ComponentId=\"clarin.eu:cr1:c_1396012485125\"><resourceName xml:lang=\"sl\">Training Corpus jos1M<\/resourceName><description xml:lang=\"sl\">The jos1M corpus contains 1 million words of sampled paragraphs from the FidaPLUS corpus. It is meant to serve as a training corpus for word-level tagging of Slovene. This silver-standard corpus is annotated for morphosyntactic descriptions (fine grained PoS tags) and lemmas, with about one fourth of the most problematic annotations hand-validated. The corpus is available in source TEI P5 XML and in the simpler and smaller vertical format, used by various concordancers.<\/description><resourceShortName>jos1M<\/resourceShortName><url>http:\/\/clarino.uib.no\/iness\/landing-page?resource=jos1M&amp;view=short<\/url><url>http:\/\/clarino.uib.no\/iness\/landing-page?resource=jos1M<\/url><PID>hdl:11495\/DC84-BF60-3823-5<\/PID><\/identificationInfo><distributionInfo ComponentId=\"clarin.eu:cr1:c_1396012485124\"><licenceInfo ComponentId=\"clarin.eu:cr1:c_1396012485158\"><userCategory>Public<\/userCategory><licence ComponentId=\"clarin.eu:cr1:c_1447674760330\"><licenceFamily>Creative Commons (CC)<\/licenceFamily><licenceName>Creative_Commons-BY-NC (CC-BY-NC)<\/licenceName><licenceURL>http:\/\/creativecommons.org\/licenses\/by-nc\/4.0\/<\/licenceURL><conditionsOfUse>BY<\/conditionsOfUse><conditionsOfUse>NC<\/conditionsOfUse><\/licence><\/licenceInfo><\/distributionInfo><contact><actorInfo ComponentId=\"clarin.eu:cr1:c_1396012485194\"><actorType>person<\/actorType><role>author<\/role><personInfo ComponentId=\"clarin.eu:cr1:c_1396012485192\"><surname>Krek<\/surname><givenName>Simon<\/givenName><affiliation><organizationInfo ComponentId=\"clarin.eu:cr1:c_1407745711883\"><organizationName xml:lang=\"en\">\u201cJo\u017eef Stefan\u201d Institute<\/organizationName><\/organizationInfo><\/affiliation><\/personInfo><\/actorInfo><\/contact><metadataInfo ComponentId=\"clarin.eu:cr1:c_1407745711922\"><metadataCreationDate>2017-03-28<\/metadataCreationDate><metadataLastDateUpdated>2018-03-06<\/metadataLastDateUpdated><metadataCreator><actorInfo ComponentId=\"clarin.eu:cr1:c_1396012485194\"><actorType>person<\/actorType><personInfo ComponentId=\"clarin.eu:cr1:c_1396012485192\"><surname xml:lang=\"no\">Dione<\/surname><givenName xml:lang=\"no\">Cheikh Bamba<\/givenName><sex>male<\/sex><position>Researcher (Ph.D)<\/position><affiliation><organizationInfo ComponentId=\"clarin.eu:cr1:c_1407745711883\"><organizationName xml:lang=\"en\">University of Bergen<\/organizationName><organizationName xml:lang=\"no\">Universitetet i Bergen<\/organizationName><organizationShortName xml:lang=\"no\">UiB<\/organizationShortName><organizationShortName xml:lang=\"en\">UoB<\/organizationShortName><departmentName xml:lang=\"en\">Department of Linguistic, Literary and Aesthetic Studies<\/departmentName><\/organizationInfo><\/affiliation><\/personInfo><communicationInfo ComponentId=\"clarin.eu:cr1:c_1352813745460\"><email>clarin@uib.no<\/email><email>iness@uib.no<\/email><\/communicationInfo><\/actorInfo><\/metadataCreator><\/metadataInfo><resourceCreationInfo ComponentId=\"clarin.eu:cr1:c_1407745711921\"><resourceCreator><actorInfo ComponentId=\"clarin.eu:cr1:c_1396012485194\"><actorType>person<\/actorType><personInfo ComponentId=\"clarin.eu:cr1:c_1396012485192\"><surname>Erjavec, Toma\u017e<\/surname><affiliation><organizationInfo ComponentId=\"clarin.eu:cr1:c_1407745711883\"><organizationName>Jo\u017eef Stefan Institute<\/organizationName><\/organizationInfo><\/affiliation><\/personInfo><\/actorInfo><actorInfo ComponentId=\"clarin.eu:cr1:c_1396012485194\"><actorType>person<\/actorType><personInfo ComponentId=\"clarin.eu:cr1:c_1396012485192\"><surname>Krek, Simon<\/surname><affiliation><organizationInfo ComponentId=\"clarin.eu:cr1:c_1407745711883\"><organizationName>Jo\u017eef Stefan Institute<\/organizationName><\/organizationInfo><\/affiliation><\/personInfo><\/actorInfo><\/resourceCreator><\/resourceCreationInfo><\/resourceCommonInfo><corpusInfo ComponentId=\"clarin.eu:cr1:c_1407745711878\"><corpusType>Written Corpus<\/corpusType><corpusPartInfo ComponentId=\"clarin.eu:cr1:c_1407745711885\"><mediaType>text<\/mediaType><\/corpusPartInfo><corpusPartGeneralInfo ComponentId=\"clarin.eu:cr1:c_1407745711882\"><annotationInfo ComponentId=\"clarin.eu:cr1:c_1407745711924\"><annotationType>morphosyntacticAnnotation-posTagging<\/annotationType><annotationType>lemmatization<\/annotationType><\/annotationInfo><\/corpusPartGeneralInfo><\/corpusInfo><\/corpusProfile><\/Components><\/CMD><\/metadata><\/record><\/p>\n","protected":false},"template":"","categories":[],"tags":[],"language-resource-type":[7572],"language-resource-origin":[7565],"class_list":["post-31859","language-resource","type-language-resource","status-publish","hentry"],"yoast_head":"<!-- This site is optimized with the Yoast SEO Premium plugin v27.1 (Yoast SEO v27.1.1) - https:\/\/yoast.com\/product\/yoast-seo-premium-wordpress\/ -->\n<title>Training Corpus jos1M - Spr\u00e5kbanken<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/\" \/>\n<meta property=\"og:locale\" content=\"nb_NO\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Training Corpus jos1M\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/\" \/>\n<meta property=\"og:site_name\" content=\"Spr\u00e5kbanken\" \/>\n<meta property=\"article:modified_time\" content=\"2025-02-10T09:56:45+00:00\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"Ansl. lesetid\" \/>\n\t<meta name=\"twitter:data1\" content=\"1 minutt\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/\",\"url\":\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/\",\"name\":\"Training Corpus jos1M - Spr\u00e5kbanken\",\"isPartOf\":{\"@id\":\"https:\/\/www.nb.no\/sprakbanken\/#website\"},\"datePublished\":\"2025-02-10T09:39:34+00:00\",\"dateModified\":\"2025-02-10T09:56:45+00:00\",\"breadcrumb\":{\"@id\":\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/#breadcrumb\"},\"inLanguage\":\"nb-NO\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\/\/www.nb.no\/sprakbanken\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Resources from the resource bank\",\"item\":\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/\"},{\"@type\":\"ListItem\",\"position\":3,\"name\":\"Training Corpus jos1M\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.nb.no\/sprakbanken\/#website\",\"url\":\"https:\/\/www.nb.no\/sprakbanken\/\",\"name\":\"Spr\u00e5kbanken\",\"description\":\"\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.nb.no\/sprakbanken\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"nb-NO\"}]}<\/script>\n<!-- \/ Yoast SEO Premium plugin. -->","yoast_head_json":{"title":"Training Corpus jos1M - Spr\u00e5kbanken","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/","og_locale":"nb_NO","og_type":"article","og_title":"Training Corpus jos1M","og_url":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/","og_site_name":"Spr\u00e5kbanken","article_modified_time":"2025-02-10T09:56:45+00:00","twitter_card":"summary_large_image","twitter_misc":{"Ansl. lesetid":"1 minutt"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/","url":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/","name":"Training Corpus jos1M - Spr\u00e5kbanken","isPartOf":{"@id":"https:\/\/www.nb.no\/sprakbanken\/#website"},"datePublished":"2025-02-10T09:39:34+00:00","dateModified":"2025-02-10T09:56:45+00:00","breadcrumb":{"@id":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/#breadcrumb"},"inLanguage":"nb-NO","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-clarino-uib-no-jos1m\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/www.nb.no\/sprakbanken\/"},{"@type":"ListItem","position":2,"name":"Resources from the resource bank","item":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/"},{"@type":"ListItem","position":3,"name":"Training Corpus jos1M"}]},{"@type":"WebSite","@id":"https:\/\/www.nb.no\/sprakbanken\/#website","url":"https:\/\/www.nb.no\/sprakbanken\/","name":"Spr\u00e5kbanken","description":"","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.nb.no\/sprakbanken\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"nb-NO"}]}},"lang":"en","translations":{"nb":31856,"en":31859},"pll_sync_post":[],"_links":{"self":[{"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/language-resource\/31859","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/language-resource"}],"about":[{"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/types\/language-resource"}],"wp:attachment":[{"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/media?parent=31859"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/categories?post=31859"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/tags?post=31859"},{"taxonomy":"language-resource-type","embeddable":true,"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/language-resource-type?post=31859"},{"taxonomy":"language-resource-origin","embeddable":true,"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/language-resource-origin?post=31859"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}