Skip to content

Commit

Permalink
Merge pull request #11 from sul-dlss/generic-seed-desc-metadata
Browse files Browse the repository at this point in the history
Generic seed desc metadata fix
  • Loading branch information
Darren Hardy authored Jul 5, 2016
2 parents c1ac763 + 5178e74 commit c6256de
Show file tree
Hide file tree
Showing 23 changed files with 200 additions and 148 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ workflow_service.log*
doc/
.yardoc/
#tmp/*.*
.pry_history

config/environments/test.rb
config/environments/dev.rb
spec/wasSeedPreassembly/fixtures/workspace/aa/111/aa/1111/aa111aa1111/metadata
spec/wasSeedPreassembly/fixtures/workspace/aa/111/aa/2222/aa111aa2222/metadata
4 changes: 4 additions & 0 deletions .rubocop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@ AllCops:
Exclude:
- 'config/environments/test.rb'
- 'vendor/**/*'

# because it's silly to care
Style/StringLiterals:
Enabled: false
53 changes: 9 additions & 44 deletions .rubocop_todo.yml
Original file line number Diff line number Diff line change
@@ -1,56 +1,32 @@
# This configuration was generated by
# `rubocop --auto-gen-config`
# on 2016-06-25 13:24:15 -0700 using RuboCop version 0.40.0.
# on 2016-07-01 14:52:36 -0700 using RuboCop version 0.40.0.
# The point is for the user to remove these configuration records
# one by one as the offenses are removed from the code base.
# Note that changes in the inspected code, or installation of new
# versions of RuboCop, may require this file to be generated again.

# Offense count: 2
# Cop supports --auto-correct.
# Configuration parameters: AlignWith, SupportedStyles.
# SupportedStyles: either, start_of_block, start_of_line
Lint/BlockAlignment:
Exclude:
- 'spec/wasCrawlDissemination/cdx_generator_service_spec.rb'
- 'spec/wasCrawlDissemination/utilities_spec.rb'

# Offense count: 2
Lint/HandleExceptions:
Exclude:
- 'bin/console'
- 'config/boot.rb'

# Offense count: 4
# Cop supports --auto-correct.
Lint/StringConversionInInterpolation:
Exclude:
- 'robots/wasCrawlPreassembly/content_metadata_generator.rb'
- 'robots/wasCrawlPreassembly/desc_metadata_generator.rb'
- 'robots/wasCrawlPreassembly/metadata_extractor.rb'
- 'robots/wasCrawlPreassembly/technical_metadata_generator.rb'

# Offense count: 3
Lint/UselessAssignment:
Exclude:
- 'robots/wasSeedDissemination/update_thumbnail_generator.rb'
- 'spec/wasCrawlDissemination/cdx_generator_service_spec.rb'

# Offense count: 7
# Offense count: 8
Metrics/AbcSize:
Max: 31

# Offense count: 2
Metrics/CyclomaticComplexity:
Max: 7

# Offense count: 415
# Offense count: 432
# Configuration parameters: AllowHeredoc, AllowURI, URISchemes.
# URISchemes: http, https
Metrics/LineLength:
Max: 544

# Offense count: 6
# Offense count: 7
# Configuration parameters: CountComments.
Metrics/MethodLength:
Max: 66
Expand Down Expand Up @@ -83,12 +59,11 @@ Style/BracesAroundHashParameters:
- 'config/deploy/stage.rb'
- 'spec/wasCrawlPreassembly/robots/end_was_crawl_preassembly_spec.rb'

# Offense count: 2
# Offense count: 1
# Cop supports --auto-correct.
Style/CommentIndentation:
Exclude:
- 'config/environments/example.rb'
- 'spec/wasSeedPreassembly/seed/content_metadata_generator_service_spec.rb'

# Offense count: 2
# Cop supports --auto-correct.
Expand All @@ -99,7 +74,7 @@ Style/ConditionalAssignment:
- 'bin/run_robot.rb'
- 'lib/was_seed_preassembly/thumbnail_generator_service.rb'

# Offense count: 30
# Offense count: 31
Style/Documentation:
Enabled: false

Expand Down Expand Up @@ -160,35 +135,32 @@ Style/IfUnlessModifier:
Exclude:
- 'lib/was_crawl_dissemination/cdx_generator_service.rb'

# Offense count: 3
# Offense count: 1
# Cop supports --auto-correct.
# Configuration parameters: EnforcedStyle, SupportedStyles.
# SupportedStyles: normal, rails
Style/IndentationConsistency:
Exclude:
- 'robots/wasSeedPreassembly/build_was_seed_druid_tree.rb'
- 'spec/wasSeedPreassembly/seed/content_metadata_generator_service_spec.rb'

# Offense count: 6
# Offense count: 5
# Cop supports --auto-correct.
# Configuration parameters: Width.
Style/IndentationWidth:
Exclude:
- 'config/deploy.rb'
- 'lib/was_crawl_dissemination/path_indexer_service.rb'
- 'spec/wasCrawlDissemination/cdx_generator_service_spec.rb'
- 'spec/wasCrawlPreassembly/lib/content_metadata_generator_service_spec.rb'
- 'spec/wasCrawlPreassembly/lib/desc_metadata_generator_service_spec.rb'
- 'spec/wasCrawlPreassembly/lib/metadata_generator_service_spec.rb'

# Offense count: 7
# Offense count: 6
# Cop supports --auto-correct.
Style/LeadingCommentSpace:
Exclude:
- 'config/boot.rb'
- 'config/deploy/dev.rb'
- 'config/environments/example.rb'
- 'spec/spec_helper.rb'

# Offense count: 6
# Cop supports --auto-correct.
Expand Down Expand Up @@ -329,13 +301,6 @@ Style/SpaceInsideParens:
Style/SpecialGlobalVars:
Enabled: false

# Offense count: 9
# Cop supports --auto-correct.
# Configuration parameters: EnforcedStyle, SupportedStyles, ConsistentQuotesInMultiline.
# SupportedStyles: single_quotes, double_quotes
Style/StringLiterals:
Enabled: false

# Offense count: 1
# Cop supports --auto-correct.
# Configuration parameters: IgnoredMethods.
Expand Down
5 changes: 4 additions & 1 deletion lib/was_seed_preassembly/thumbnail_generator_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
module Dor
module WASSeed
class ThumbnailGeneratorService
# because this date is earlier than any of the archived dates of the content,
# this tells openwayback to provide the earliest capture date.
DATE_TO_TRIGGER_EARLIEST_CAPTURE_DATE = '19900101120000'.freeze
def self.capture_thumbnail(druid, workspace, uri)
thumbnail_file = "#{DruidTools::Druid.new(druid, workspace).content_dir}/thumbnail.jp2"
wayback_uri = "#{Dor::Config.was_seed.wayback_uri}/19900101120000/#{uri}"
wayback_uri = "#{Dor::Config.was_seed.wayback_uri}/#{DATE_TO_TRIGGER_EARLIEST_CAPTURE_DATE}/#{uri}"
temporary_file = "tmp/#{druid[6, 14]}"
result = ''
begin
Expand Down
2 changes: 1 addition & 1 deletion robots/wasCrawlPreassembly/content_metadata_generator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def perform(druid)
collection_id = Dor::WASCrawl::Utilities.get_collection_id(druid_obj)
staging_path = Dor::Config.was_crawl.staging_path

LyberCore::Log.info "Creating ContentMetadataGenerator with parameters #{collection_id}, #{staging_path.to_s}, #{druid}"
LyberCore::Log.info "Creating ContentMetadataGenerator with parameters #{collection_id}, #{staging_path}, #{druid}"
metadata_generator_service = Dor::WASCrawl::ContentMetadataGenerator.new(collection_id,
staging_path.to_s, druid)
metadata_generator_service.generate_metadata_output
Expand Down
2 changes: 1 addition & 1 deletion robots/wasCrawlPreassembly/desc_metadata_generator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def perform(druid)
collection_id = Dor::WASCrawl::Utilities.get_collection_id(druid_obj)
staging_path = Dor::Config.was_crawl.staging_path

LyberCore::Log.info "Creating DescMetadataGenerator with parameters #{collection_id}, #{staging_path.to_s}, #{druid}"
LyberCore::Log.info "Creating DescMetadataGenerator with parameters #{collection_id}, #{staging_path}, #{druid}"
metadata_generator_service = Dor::WASCrawl::DescMetadataGenerator.new(collection_id,
staging_path.to_s, druid)
metadata_generator_service.generate_metadata_output
Expand Down
2 changes: 1 addition & 1 deletion robots/wasCrawlPreassembly/metadata_extractor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def perform(druid)
crawl_id = Dor::WASCrawl::Utilities.get_crawl_id(druid_obj)
staging_path = Dor::Config.was_crawl.staging_path

LyberCore::Log.info "Creating MetadataExtractor with parameters #{collection_id}, #{crawl_id}, #{staging_path.to_s}, #{druid}"
LyberCore::Log.info "Creating MetadataExtractor with parameters #{collection_id}, #{crawl_id}, #{staging_path}, #{druid}"
metadata_extractor_service = Dor::WASCrawl::MetadataExtractor.new(collection_id, crawl_id, staging_path.to_s, druid)
metadata_extractor_service.run_metadata_extractor_jar
end
Expand Down
2 changes: 1 addition & 1 deletion robots/wasCrawlPreassembly/technical_metadata_generator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def perform(druid)
collection_id = Dor::WASCrawl::Utilities.get_collection_id(druid_obj)
staging_path = Dor::Config.was_crawl.staging_path

LyberCore::Log.info "Creating TechnicalMetadataGenerator with parameters #{collection_id}, #{staging_path.to_s}, #{druid}"
LyberCore::Log.info "Creating TechnicalMetadataGenerator with parameters #{collection_id}, #{staging_path}, #{druid}"
metadata_generator_service = Dor::WASCrawl::TechnicalMetadataGenerator.new(collection_id,
staging_path.to_s, druid)
metadata_generator_service.generate_metadata_output
Expand Down
4 changes: 2 additions & 2 deletions robots/wasSeedDissemination/update_thumbnail_generator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ def get_original_uri(descMetadata_ng)
end

def send_to_thumbnail_generator(druid_id, original_uri)
response = RestClient.get "#{Dor::Config.thumbnail_generator_service_uri}api/seed/create?druid=#{druid_id}&uri=#{original_uri}"
rescue RestClient::Conflict => e
RestClient.get "#{Dor::Config.thumbnail_generator_service_uri}api/seed/create?druid=#{druid_id}&uri=#{original_uri}"
rescue RestClient::Conflict
LyberCore::Log.error("#{druid_id} already exists on #{Dor::Config.thumbnail_generator_service_uri}")
end
end
Expand Down
1 change: 0 additions & 1 deletion spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
# for wasSeedDissemination
require 'wasSeedDissemination/update_thumbnail_generator'

#require 'pry'
require 'rspec'
require 'awesome_print'
require 'nokogiri'
Expand Down
3 changes: 1 addition & 2 deletions spec/wasCrawlDissemination/cdx_generator_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

it 'should generate cdx file for each warc or arc file in contentMetadata', :openwayback_prerequisite do
contentMetadata = File.open(@content_metadata_xml_location + 'contentMetadata_3files.xml').read
collection_path = "#{@stacks_path}/data/collections/test_collection"
cdx_generator = Dor::WASCrawl::CDXGeneratorService.new(@collection_path, @druid_id_1, contentMetadata)

cdx_generator.instance_variable_set(:@cdx_working_directory, "#{@stacks_path}/data/indecies/cdx_working")
Expand Down Expand Up @@ -166,7 +165,7 @@
expect(@cdx_generator.get_cdx_file_name('/tmp/file.txt') ).to eq('file.txt.cdx')
expect(@cdx_generator.get_cdx_file_name('c://tmp/file.txt') ).to eq('file.txt.cdx')
expect(@cdx_generator.get_cdx_file_name('file://tmp/file.txt')).to eq('file.txt.cdx')
end
end
end

context '.prepare_cdx_generation_cmd_string' do
Expand Down
12 changes: 6 additions & 6 deletions spec/wasCrawlDissemination/utilities_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,12 @@
expect(file_list.length).to eq(0)
end
it 'should return an empty list for the contentMetadata with dark archive shelve=no' do
content_metadata_xml_location = 'spec/wasCrawlDissemination/fixtures/metadata/'
contentMetadata = File.open(content_metadata_xml_location + 'contentMetadata_dark.xml').read
content_metadata_xml_location = 'spec/wasCrawlDissemination/fixtures/metadata/'
contentMetadata = File.open(content_metadata_xml_location + 'contentMetadata_dark.xml').read

file_list = Dor::WASCrawl::Dissemination::Utilities.get_warc_file_list_from_contentMetadata(contentMetadata)
expect(file_list).not_to be_nil
expect(file_list.length).to eq(0)
end
file_list = Dor::WASCrawl::Dissemination::Utilities.get_warc_file_list_from_contentMetadata(contentMetadata)
expect(file_list).not_to be_nil
expect(file_list.length).to eq(0)
end
end
end
12 changes: 6 additions & 6 deletions spec/wasCrawlPreassembly/lib/metadata_generator_service_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
generate_data_items
end

context Dor::WASCrawl::MetadataGenerator, 'read_metadata_xml_input_file' do
context '#read_metadata_xml_input_file' do
it 'should read the file successfully if druid id is passed' do
druid_id = 'druid:gh123gh1234'
metadata_generator_service = generate_object(druid_id)
Expand Down Expand Up @@ -38,7 +38,7 @@
end
end

context Dor::WASCrawl::MetadataGenerator, 'write_file_to_druid_metadata_folder' do
context '#write_file_to_druid_metadata_folder' do
it "should raise an error if the druid directory tree doesn't exist in the workspace" do
druid_id = 'druid:xx111xx1111'
metadata_generator_service = generate_object(druid_id)
Expand Down Expand Up @@ -83,7 +83,7 @@
end
end

context Dor::WASCrawl::MetadataGenerator, 'read_template' do
context '#read_template' do
it 'should read the contentMetadata template successfully' do
druid_id = 'druid:gh123gh1234'
metadata_generator_service = generate_object(druid_id)
Expand Down Expand Up @@ -117,7 +117,7 @@
end
end

context Dor::WASCrawl::MetadataGenerator, 'transform_xml_using_xslt' do
context '#transform_xml_using_xslt' do
it 'should transform the xml using xslt with valid inputs' do
druid_id = 'druid:xx'
metadata_generator_service = generate_object(druid_id)
Expand Down Expand Up @@ -145,12 +145,12 @@
<newuser>John Smith</newuser>
</newuserlist>
EOF
actual_transformed = metadata_generator_service.transform_xml_using_xslt(xml_doc, xslt_doc)
actual_transformed = metadata_generator_service.transform_xml_using_xslt(xml_doc, xslt_doc)
expect(actual_transformed.to_s).to eq expected_transformed
end
end

context Dor::WASCrawl::MetadataGenerator, 'do_post_transform' do
context '#do_post_transform' do
it 'should return the string with no modification' do
druid_id = 'druid:gh123gh1234'
metadata_generator_service = generate_object(druid_id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,6 @@
<languageTerm authority="iso639-2b" type="code">eng</languageTerm>
</languageOfCataloging>
<recordContentSource authority="marcorg">CSt</recordContentSource>
<recordOrigin>Transformed from record for http://urbanstudies.stanford.edu/ used in the web archiving service Archive-It and which is part of the Fugitive US Agencies collection (record ID ).</recordOrigin>
<recordOrigin>Transformed from record for http://urbanstudies.stanford.edu/ used in the web archiving service Archive-It and which is part of the collection (record ID ).</recordOrigin>
</recordInfo>
</mods>
Binary file not shown.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0"?>
<item>
<druid_id>druid:ff098xp7185</druid_id>
<collection_id>druid:gz033bg3146</collection_id>
<source_id>a</source_id>
<uri>http://www.epa.gov/</uri>
<source>G</source>
<embargo>false</embargo>
<source_xml/>
</item>
Loading

0 comments on commit c6256de

Please sign in to comment.