{"id":31818,"date":"2025-02-10T10:39:29","date_gmt":"2025-02-10T09:39:29","guid":{"rendered":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101-2\/"},"modified":"2025-02-10T10:56:17","modified_gmt":"2025-02-10T09:56:17","slug":"oai-nb-no-sbr-101","status":"publish","type":"language-resource","link":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/","title":{"rendered":"Synthetic text images for North, South, Lule and Inare S\u00e1mi"},"content":{"rendered":"<p><?xml version=\"1.0\" encoding=\"utf-8\"?><br \/>\n<record><\/p>\n<header><identifier>oai:nb.no:sbr-101<\/identifier><datestamp>2025-01-28<\/datestamp><\/header>\n<p><metadata><CMD xmlns=\"http:\/\/www.clarin.eu\/cmd\/\"><Header><MdCreator>Arne Martinus Lindstad<\/MdCreator><MdCreationDate>2025-01-28<\/MdCreationDate><MdSelfLink>https:\/\/www.nb.no\/sprakbanken\/oai?verb=GetRecord&amp;identifier=oai:nb.no:sbr-101&amp;metadataPrefix=cmdi<\/MdSelfLink><MdProfile>clarin.eu:cr1:p_1562754657363<\/MdProfile><MdCollectionDisplayName>Spr\u00e5kbanken NB<\/MdCollectionDisplayName><\/Header><Resources><ResourceProxyList><ResourceProxy id=\"sami_img_ocr\"><ResourceType mimetype=\"application\/zip\">Resource<\/ResourceType><ResourceRef>https:\/\/www.nb.no\/sbfil\/samisk_ocr\/syntetisk_data\/parquet_files.zip<\/ResourceRef><\/ResourceProxy><ResourceProxy id=\"sami_img_ocr_nob\"><ResourceType mimetype=\"application\/pdf\">Resource<\/ResourceType><ResourceRef>https:\/\/www.nb.no\/sbfil\/samisk_ocr\/syntetisk_data\/README_nob.pdf<\/ResourceRef><\/ResourceProxy><ResourceProxy id=\"sami_img_ocr_eng\"><ResourceType mimetype=\"application\/pdf\">Resource<\/ResourceType><ResourceRef>https:\/\/www.nb.no\/sbfil\/samisk_ocr\/syntetisk_data\/README_eng.pdf<\/ResourceRef><\/ResourceProxy><\/ResourceProxyList><JournalFileProxyList\/><ResourceRelationList><ResourceRelation><RelationType>describes<\/RelationType><Res1 ref=\"sami_img_ocr_nob\"\/><Res2 ref=\"sami_img_ocr\"\/><\/ResourceRelation><ResourceRelation><RelationType>describes<\/RelationType><Res1 ref=\"sami_img_ocr_eng\"\/><Res2 ref=\"sami_img_ocr\"\/><\/ResourceRelation><\/ResourceRelationList><IsPartOfList\/><\/Resources><Components><toolProfile><resourceCommonInfo ComponentId=\"clarin.eu:cr1:c_1396012485126\"><resourceType>toolService<\/resourceType><identificationInfo ComponentId=\"clarin.eu:cr1:c_1396012485125\"><resourceName xml:lang=\"nb\">Syntetiske tekstbilder for nord-, s\u00f8r-, lule- og inaresamisk<\/resourceName><resourceName xml:lang=\"en\">Synthetic text images for North, South, Lule and Inare S\u00e1mi<\/resourceName><description xml:lang=\"nb\">Dette datasettet inneholder syntetiske linjebilder som kan brukes til \u00e5 finjustere OCR-modeller for nord-, s\u00f8r-, lule- og inaresamisk. Fremgangsm\u00e5ten for \u00e5 lage disse bildene er \u00e5 lage &#8216;rene&#8217; linjebilder og tilf\u00f8re st\u00f8y ved hjelp av Augraphy.<\/p>\n<p>Teksten i datasettet kommer fra Giellatekno sitt korpus.<\/p>\n<p>Datasettet er tilfeldig delt opp slik at 71% av filene (307387 linjer) er i treningsdelen, 9% av filene (40765 linjer) er i valideringsdelen og 20% av filene er i (84534 linjer) testdelen. Hver del har en unik mengde skrifttyper og tekst- og bakgrunnsfarger.<\/p>\n<p>Se dokumentasjonsfilen for mer informasjon.<\/description><description xml:lang=\"en\">This dataset contains synthetic line images meant for fitting OCR models for North, South, Lule and Inari S\u00e1mi. Clean line images are created using Pillow and they are subsequently distorted using Augraphy.<\/p>\n<p>The text in this dataset comes from Giellatekno&#8217;s corpus.<\/p>\n<p>The dataset is split randomly by file so 71 % of the files (307387 lines) are in the training split, 9 % of the files (40765 lines) are in the validation split and 20 % of the files (84534 lines) are in the test split. Each split has a unique set of typefaces and text\/background colors.<br \/>\n|<br \/>\nSee the documentation file for more information.<\/description><url description=\"resource homepage\">https:\/\/www.nb.no\/sprakbanken\/ressurskatalog\/oai-nb-no-sbr-101\/<\/url><PID description=\"handle\">hdl:21.11146\/101<\/PID><identifier>sbr-101<\/identifier><\/identificationInfo><distributionInfo ComponentId=\"clarin.eu:cr1:c_1396012485124\"><licenceInfo ComponentId=\"clarin.eu:cr1:c_1396012485158\"><userCategory>Public<\/userCategory><distributionAccessMedium>downloadable<\/distributionAccessMedium><downloadLocation description=\"resource homepage\">https:\/\/www.nb.no\/sprakbanken\/ressurskatalog\/oai-nb-no-sbr-101\/<\/downloadLocation><attributionText xml:lang=\"en\">Please cite<\/p>\n<p>1. Enstad T, Trosterud T, R\u00f8sok MI, Beyer Y, Roald M. &#8216;Comparative analysis of optical character recognition methods for S\u00e1mi texts from the National Library of Norway.&#8217; Accepted for publication in Proceedings of the 25th Nordic Conference on Computational Linguistics (NoDaLiDa) 2025, https:\/\/arxiv.org\/abs\/2501.07300.<\/p>\n<p>2. SIKOR UiT The Arctic University of Norway and the Norwegian Saami Parliament&#8217;s Saami text collection, http:\/\/gtweb.uit.no\/korp, Version 01.12.2021 [Data set]. (Also note that the SIKOR dataset to get S\u00e1mi text for the images is CC-BY 3.0 licensed.)<\/attributionText><licence ComponentId=\"clarin.eu:cr1:c_1447674760330\"><licenceFamily>Creative Commons (CC)<\/licenceFamily><licenceName>Creative_Commons-BY (CC-BY)<\/licenceName><licenceURL>https:\/\/creativecommons.org\/licenses\/by\/3.0\/<\/licenceURL><conditionsOfUse>BY<\/conditionsOfUse><\/licence><licensor><actorInfo ComponentId=\"clarin.eu:cr1:c_1396012485194\"><actorType>organization<\/actorType><role xml:lang=\"en\">Licensor<\/role><organizationInfo ComponentId=\"clarin.eu:cr1:c_1407745711883\"><organizationName xml:lang=\"nb\">Nasjonalbiblioteket<\/organizationName><organizationName xml:lang=\"en\">National Library of Norway<\/organizationName><organizationShortName xml:lang=\"nb\">NB<\/organizationShortName><organizationShortName xml:lang=\"en\">NLN<\/organizationShortName><departmentName xml:lang=\"nb\">Spr\u00e5kbanken<\/departmentName><departmentName xml:lang=\"en\">The Language Bank<\/departmentName><\/organizationInfo><communicationInfo ComponentId=\"clarin.eu:cr1:c_1352813745460\"><email>sprakbanken@nb.no<\/email><url>https:\/\/www.nb.no\/sprakbanken\/<\/url><\/communicationInfo><\/actorInfo><\/licensor><\/licenceInfo><\/distributionInfo><contact><actorInfo ComponentId=\"clarin.eu:cr1:c_1396012485194\"><actorType>organization<\/actorType><role xml:lang=\"en\">Contact<\/role><organizationInfo ComponentId=\"clarin.eu:cr1:c_1407745711883\"><organizationName xml:lang=\"nb\">Nasjonalbiblioteket<\/organizationName><organizationName xml:lang=\"en\">National Library of Norway<\/organizationName><organizationShortName xml:lang=\"nb\">NB<\/organizationShortName><organizationShortName xml:lang=\"en\">NLN<\/organizationShortName><departmentName xml:lang=\"nb\">Spr\u00e5kbanken<\/departmentName><departmentName xml:lang=\"en\">The Language Bank<\/departmentName><\/organizationInfo><communicationInfo ComponentId=\"clarin.eu:cr1:c_1352813745460\"><email>sprakbanken@nb.no<\/email><url>https:\/\/www.nb.no\/sprakbanken\/<\/url><\/communicationInfo><\/actorInfo><\/contact><metadataInfo ComponentId=\"clarin.eu:cr1:c_1407745711922\"><metadataCreationDate>2025-01-28<\/metadataCreationDate><metadataLanguageName>Norwegian Bokm\u00e5l<\/metadataLanguageName><metadataLanguageName>English<\/metadataLanguageName><metadataLanguageId>nb<\/metadataLanguageId><metadataLanguageId>en<\/metadataLanguageId><metadataLastDateUpdated>2025-01-28<\/metadataLastDateUpdated><metadataCreator><actorInfo ComponentId=\"clarin.eu:cr1:c_1396012485194\"><actorType>organization<\/actorType><role xml:lang=\"en\">Metadata Creator<\/role><organizationInfo ComponentId=\"clarin.eu:cr1:c_1407745711883\"><organizationName xml:lang=\"nb\">Nasjonalbiblioteket<\/organizationName><organizationName xml:lang=\"en\">National Library of Norway<\/organizationName><organizationShortName xml:lang=\"nb\">NB<\/organizationShortName><organizationShortName xml:lang=\"en\">NLN<\/organizationShortName><departmentName xml:lang=\"nb\">Spr\u00e5kbanken<\/departmentName><departmentName xml:lang=\"en\">The Language Bank<\/departmentName><\/organizationInfo><communicationInfo ComponentId=\"clarin.eu:cr1:c_1352813745460\"><email>sprakbanken@nb.no<\/email><url>https:\/\/www.nb.no\/sprakbanken\/<\/url><\/communicationInfo><\/actorInfo><\/metadataCreator><\/metadataInfo><resourceCreationInfo ComponentId=\"clarin.eu:cr1:c_1407745711921\"><creationStartDate>2024-10-01<\/creationStartDate><creationEndDate>2025-01-28<\/creationEndDate><resourceCreator><actorInfo ComponentId=\"clarin.eu:cr1:c_1396012485194\"><actorType>organization<\/actorType><role xml:lang=\"en\">Resource Creator<\/role><organizationInfo ComponentId=\"clarin.eu:cr1:c_1407745711883\"><organizationName xml:lang=\"nb\">Nasjonalbiblioteket<\/organizationName><organizationName xml:lang=\"en\">National Library of Norway<\/organizationName><organizationShortName xml:lang=\"nb\">NB<\/organizationShortName><organizationShortName xml:lang=\"en\">NLN<\/organizationShortName><departmentName xml:lang=\"nb\">Spr\u00e5kbanken<\/departmentName><departmentName xml:lang=\"en\">The Language Bank<\/departmentName><\/organizationInfo><communicationInfo ComponentId=\"clarin.eu:cr1:c_1352813745460\"><email>sprakbanken@nb.no<\/email><url>https:\/\/www.nb.no\/sprakbanken\/<\/url><\/communicationInfo><\/actorInfo><\/resourceCreator><\/resourceCreationInfo><\/resourceCommonInfo><toolInfo ComponentId=\"clarin.eu:cr1:c_1562754657362\"><description>Synthetic text images for S\u00e1mi Languages<\/description><inputInfo ComponentId=\"clarin.eu:cr1:c_1360931019804\"><mediaType>image<\/mediaType><\/inputInfo><outputInfo ComponentId=\"clarin.eu:cr1:c_1360931019824\"><mediaType>text<\/mediaType><\/outputInfo><Service ComponentId=\"clarin.eu:cr1:c_1505397653787\" validation-error=\"Validation error: CoreVersion: A value has to be specified.\"><Name>Synthetic images for S\u00e1mi Languages<\/Name><ServiceDescriptionLocation><Location>https:\/\/www.nb.no\/sprakbanken\/ressurskatalog\/oai-nb-no-sbr-101\/<\/Location><\/ServiceDescriptionLocation><Operations><Operation ComponentId=\"clarin.eu:cr1:c_1299509410080\"><Name>OCR<\/Name><Output><ParameterGroup ComponentId=\"clarin.eu:cr1:c_1302702320471\"><Parameters><Parameter ComponentId=\"clarin.eu:cr1:c_1299509410079\"\/><\/Parameters><\/ParameterGroup><\/Output><\/Operation><\/Operations><\/Service><\/toolInfo><\/toolProfile><\/Components><\/CMD><\/metadata><\/record><\/p>\n","protected":false},"template":"","categories":[],"tags":[],"language-resource-type":[8283],"language-resource-origin":[8313],"class_list":["post-31818","language-resource","type-language-resource","status-publish","hentry"],"yoast_head":"<!-- This site is optimized with the Yoast SEO Premium plugin v27.1 (Yoast SEO v27.1.1) - https:\/\/yoast.com\/product\/yoast-seo-premium-wordpress\/ -->\n<title>Synthetic text images for North, South, Lule and Inare S\u00e1mi - Spr\u00e5kbanken<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/\" \/>\n<meta property=\"og:locale\" content=\"nb_NO\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Synthetic text images for North, South, Lule and Inare S\u00e1mi\" \/>\n<meta property=\"og:url\" content=\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/\" \/>\n<meta property=\"og:site_name\" content=\"Spr\u00e5kbanken\" \/>\n<meta property=\"article:modified_time\" content=\"2025-02-10T09:56:17+00:00\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"Ansl. lesetid\" \/>\n\t<meta name=\"twitter:data1\" content=\"2 minutter\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"WebPage\",\"@id\":\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/\",\"url\":\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/\",\"name\":\"Synthetic text images for North, South, Lule and Inare S\u00e1mi - Spr\u00e5kbanken\",\"isPartOf\":{\"@id\":\"https:\/\/www.nb.no\/sprakbanken\/#website\"},\"datePublished\":\"2025-02-10T09:39:29+00:00\",\"dateModified\":\"2025-02-10T09:56:17+00:00\",\"breadcrumb\":{\"@id\":\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/#breadcrumb\"},\"inLanguage\":\"nb-NO\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/\"]}]},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\/\/www.nb.no\/sprakbanken\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Resources from the resource bank\",\"item\":\"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/\"},{\"@type\":\"ListItem\",\"position\":3,\"name\":\"Synthetic text images for North, South, Lule and Inare S\u00e1mi\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/www.nb.no\/sprakbanken\/#website\",\"url\":\"https:\/\/www.nb.no\/sprakbanken\/\",\"name\":\"Spr\u00e5kbanken\",\"description\":\"\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/www.nb.no\/sprakbanken\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"nb-NO\"}]}<\/script>\n<!-- \/ Yoast SEO Premium plugin. -->","yoast_head_json":{"title":"Synthetic text images for North, South, Lule and Inare S\u00e1mi - Spr\u00e5kbanken","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/","og_locale":"nb_NO","og_type":"article","og_title":"Synthetic text images for North, South, Lule and Inare S\u00e1mi","og_url":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/","og_site_name":"Spr\u00e5kbanken","article_modified_time":"2025-02-10T09:56:17+00:00","twitter_card":"summary_large_image","twitter_misc":{"Ansl. lesetid":"2 minutter"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"WebPage","@id":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/","url":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/","name":"Synthetic text images for North, South, Lule and Inare S\u00e1mi - Spr\u00e5kbanken","isPartOf":{"@id":"https:\/\/www.nb.no\/sprakbanken\/#website"},"datePublished":"2025-02-10T09:39:29+00:00","dateModified":"2025-02-10T09:56:17+00:00","breadcrumb":{"@id":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/#breadcrumb"},"inLanguage":"nb-NO","potentialAction":[{"@type":"ReadAction","target":["https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/"]}]},{"@type":"BreadcrumbList","@id":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/oai-nb-no-sbr-101\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/www.nb.no\/sprakbanken\/"},{"@type":"ListItem","position":2,"name":"Resources from the resource bank","item":"https:\/\/www.nb.no\/sprakbanken\/en\/resource-catalogue\/"},{"@type":"ListItem","position":3,"name":"Synthetic text images for North, South, Lule and Inare S\u00e1mi"}]},{"@type":"WebSite","@id":"https:\/\/www.nb.no\/sprakbanken\/#website","url":"https:\/\/www.nb.no\/sprakbanken\/","name":"Spr\u00e5kbanken","description":"","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/www.nb.no\/sprakbanken\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"nb-NO"}]}},"lang":"en","translations":{"nb":31815,"en":31818},"pll_sync_post":[],"_links":{"self":[{"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/language-resource\/31818","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/language-resource"}],"about":[{"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/types\/language-resource"}],"wp:attachment":[{"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/media?parent=31818"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/categories?post=31818"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/tags?post=31818"},{"taxonomy":"language-resource-type","embeddable":true,"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/language-resource-type?post=31818"},{"taxonomy":"language-resource-origin","embeddable":true,"href":"https:\/\/www.nb.no\/sprakbanken\/wp-json\/wp\/v2\/language-resource-origin?post=31818"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}