mirror of
https://github.com/moparisthebest/rswiki
synced 2024-12-21 07:08:55 -05:00
Add tools to convert
This commit is contained in:
parent
a729ec2114
commit
d93c62b0d7
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
wiki/
|
1
.ruby-version
Normal file
1
.ruby-version
Normal file
@ -0,0 +1 @@
|
||||
1.9.3-p392
|
11
README.md
11
README.md
@ -1 +1,10 @@
|
||||
This is the wiki hosted at rswiki.moparisthebest.com
|
||||
This is the wiki hosted at rswiki.moparisthebest.com, in the wiki git repo.
|
||||
|
||||
In this repo you'll find the tools we used for converting from mediawiki to gollum:
|
||||
|
||||
* legit_pages.py was written by vortex, and used to scrape and generate legit_pages.txt, which was used to export RSWiki-20150610160818.xml
|
||||
* mw-to-gollum.rb was slightly modified from here: https://gist.github.com/MasterRoot24/ab85de0e7b82ba7f5974
|
||||
* mediawiki2gollum.sh uses mw-to-gollum.rb to convert the mediawiki xml, then does various things to clean up all the links and names so they will work
|
||||
* category.sh scrapes and generates category pages like mediawiki, needs to be ran whenever pages are added to categories
|
||||
|
||||
todo: historical versions not converted/saved yet
|
17657
RSWiki-20150610160818.xml
Normal file
17657
RSWiki-20150610160818.xml
Normal file
File diff suppressed because it is too large
Load Diff
52
category.sh
Executable file
52
category.sh
Executable file
@ -0,0 +1,52 @@
|
||||
#!/bin/bash
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
function prepareFile(){
|
||||
path="$1"
|
||||
tmp_path="$(basename "$path").tmp"
|
||||
(
|
||||
grep 'CODE AUTOMATICALLY GENERATED BY category.sh -->' "$path" &>/dev/null
|
||||
if [ $? -eq 0 ]
|
||||
then
|
||||
sed -n '/CODE AUTOMATICALLY GENERATED BY category.sh -->/q;p' "$path"
|
||||
else
|
||||
cat "$path" && echo
|
||||
fi
|
||||
echo -e '<!-- DO NOT EDIT BELOW THIS LINE, OR CHANGE THIS COMMENT, CODE AUTOMATICALLY GENERATED BY category.sh -->'
|
||||
) > "$tmp_path"
|
||||
echo "$tmp_path"
|
||||
}
|
||||
|
||||
function finishFile(){
|
||||
path="$1"
|
||||
tmp_path="$(basename "$path").tmp"
|
||||
mv "$tmp_path" "$path"
|
||||
}
|
||||
|
||||
files_lines_categories="$(grep '^\[\[Category [^]./\]*\]\]$' *)"
|
||||
#echo "files_lines_categories: $files_lines_categories"
|
||||
|
||||
echo '<!-- DO NOT EDIT THIS FILE, CODE AUTOMATICALLY GENERATED BY category.sh -->' > Categories.mediawiki
|
||||
echo 'The following categories contain pages or media.' >> Categories.mediawiki
|
||||
|
||||
echo "$files_lines_categories" | grep -o 'Category [^]]*' | sort | uniq | while read category
|
||||
do
|
||||
#echo "category: $category"
|
||||
|
||||
category_file="$(echo "$category" | sed 's/ /-/g').mediawiki"
|
||||
echo "category_file: $category_file"
|
||||
|
||||
result="$(prepareFile "$category_file")"
|
||||
echo "== '''Pages in category \"$(echo $category | sed 's/Category //')\"''' ==" >> "$result"
|
||||
files="$(echo "$files_lines_categories" | grep ":\[\[${category}\]\]$" | sort)"
|
||||
num_pages="$(echo "$files" | wc -l)"
|
||||
|
||||
echo -e "* [[$category]] ($num_pages members)" >> Categories.mediawiki
|
||||
|
||||
echo "The following $num_pages pages are in this category." >> "$result"
|
||||
echo "$files" | while read file
|
||||
do
|
||||
echo "* [[$(echo $file | sed -e "s/\.mediawiki:\[\[${category}\]\]$//" -e 's/-/ /g')]]"
|
||||
done >> "$result"
|
||||
finishFile "$category_file"
|
||||
done
|
34
legit_pages.py
Normal file
34
legit_pages.py
Normal file
@ -0,0 +1,34 @@
|
||||
import requests, logging
|
||||
from bs4 import BeautifulSoup
|
||||
from queue import Queue
|
||||
from threading import Thread
|
||||
|
||||
logging.getLogger("requests").setLevel(logging.WARNING)
|
||||
|
||||
|
||||
def get_pages(page):
|
||||
global all_pages
|
||||
if page in all_pages:
|
||||
return
|
||||
all_pages.append(page)
|
||||
ebin.write(page + "\n")
|
||||
print(page + "\n")
|
||||
req = requests.get("https://rswiki.moparisthebest.com/index.php?title=" + page)
|
||||
soup = BeautifulSoup(req.text)
|
||||
content = soup.find("div", id="mw-content-text")
|
||||
links = content.find_all("a")
|
||||
for link in links:
|
||||
link = link["href"]
|
||||
off = link.find("?title=")
|
||||
if off is -1:
|
||||
continue
|
||||
next_page = link[off + 7:]
|
||||
if "#" in next_page:
|
||||
next_page = next_page[:next_page.find("#")]
|
||||
if "&" not in next_page and "Special:" not in next_page and next_page != page:
|
||||
get_pages(next_page)
|
||||
|
||||
|
||||
ebin = open("legit_pages.txt", "w")
|
||||
all_pages = []
|
||||
pages = get_pages("Main_Page")
|
190
legit_pages.txt
Normal file
190
legit_pages.txt
Normal file
@ -0,0 +1,190 @@
|
||||
Main_Page
|
||||
Rules
|
||||
DMCA_Policy
|
||||
RSWiki:General_disclaimer
|
||||
RSWiki:Privacy_policy
|
||||
RSWiki_IRC
|
||||
RSWiki:About
|
||||
Category:RSC
|
||||
135_Protocol
|
||||
202_Protocol
|
||||
204_Items
|
||||
204_NPCs
|
||||
204_Objects
|
||||
204_Protocol
|
||||
OB3
|
||||
Category:RS2
|
||||
194_Protocol
|
||||
194:Show_interface
|
||||
Data_Types
|
||||
Word
|
||||
DWord
|
||||
QWord
|
||||
RS_String
|
||||
194:Logout
|
||||
194:Clear_screen
|
||||
289_Protocol
|
||||
289:Send_sidebar_interface
|
||||
317:Send_Skill
|
||||
289:Send_Player_Head
|
||||
317:Friends_list_status
|
||||
289:Construct_Map_Region
|
||||
317_Protocol
|
||||
Category:Packet:317
|
||||
289:Player_Dialogue_Head
|
||||
317:Add_friend
|
||||
317:Add_ignore
|
||||
317:Alternate_item_option_2
|
||||
317:Animation_reset
|
||||
317:Attack_(NPC)
|
||||
317:Audio
|
||||
317:Bank_10_items
|
||||
317:Bank_5_items
|
||||
317:Bank_all_items
|
||||
317:Bank_X_items_part-1
|
||||
317:Bank_X_items_part-2
|
||||
317:Button_click
|
||||
317:Camera_movement
|
||||
317:Camera_shake
|
||||
317:Chat_interface
|
||||
317:Chat_interface_click
|
||||
317:Chat_settings
|
||||
317:Clear_inventory
|
||||
317:Clear_screen
|
||||
317:Close_window
|
||||
317:Construct_map_region
|
||||
317:Design_screen
|
||||
317:Display_hint_icon
|
||||
317:Drop_item
|
||||
317:Enter_name
|
||||
317:Equip_item
|
||||
317:Flash_sidebar
|
||||
317:Focus_change
|
||||
317:Follow
|
||||
317:Force_client_setting
|
||||
317:Hidden_Interface
|
||||
317:Idle
|
||||
317:Idle_logout
|
||||
317:Initialize_player
|
||||
317:Input_amount
|
||||
317:Interface_animation
|
||||
317:Interface_color
|
||||
317:Interface_item
|
||||
317:Interface_model_rotation
|
||||
317:Interface_offset
|
||||
317:Interface_over_tab
|
||||
317:Inventory_overlay
|
||||
317:Item_action_1
|
||||
317:Item_on_floor
|
||||
317:Item_on_item
|
||||
317:Item_on_object
|
||||
317:Item_on_player
|
||||
317:Light_item
|
||||
317:Load_map_region
|
||||
317:Loading_finished
|
||||
317:Logout
|
||||
317:Magic_on_items
|
||||
317:Magic_on_player
|
||||
317:Minimap_State
|
||||
317:Move_item
|
||||
317:Music
|
||||
317:NPC_action_1
|
||||
317:NPC_action_2
|
||||
317:NPC_action_3
|
||||
317:NPC_head_on_interface
|
||||
317:Object_action_1
|
||||
317:Object_action_2
|
||||
317:Object_action_3
|
||||
317:Open_chatbox_interface
|
||||
317:Open_welcome_screen
|
||||
317:Pickup_ground_item
|
||||
317:Play_song
|
||||
317:Player_command
|
||||
317:Player_head_to_interface
|
||||
317:Privacy_options
|
||||
317:Region_change
|
||||
317:Remove_friend
|
||||
317:Remove_ignore
|
||||
317:Report_player
|
||||
317:Reset_button_state
|
||||
317:Reset_camera
|
||||
317:Reset_destination
|
||||
317:Run_energy
|
||||
317:Scroll_position
|
||||
317:Send_add_ignore
|
||||
317:Send_message
|
||||
317:Send_sidebar_interface
|
||||
317:Set_interface_text
|
||||
317:Show_interface
|
||||
317:Show_multi-combat
|
||||
317:Show_tab
|
||||
317:Skill_level
|
||||
317:Song_Queue
|
||||
317:System_update
|
||||
317:Trade_answer
|
||||
317:Trade_request
|
||||
317:Unequip_item
|
||||
317:Update_item_container
|
||||
317:Walkable_interface
|
||||
317:Weight
|
||||
317:Send_add_friend
|
||||
317:Begin_player_updating
|
||||
317:Object_removal
|
||||
317:Create_Projectile
|
||||
317:Object_spawn
|
||||
317:Send_private_message
|
||||
317:NPC_Dialogue
|
||||
317:Mouse_click
|
||||
317:Ground_Item_Action
|
||||
357_Protocol
|
||||
377_Protocol
|
||||
377:Interface_Animation
|
||||
377:Move_Camera
|
||||
377:Logout
|
||||
377:Send_Sidebar_Interface
|
||||
377:Animation_Reset
|
||||
377:Interface_Item
|
||||
377:Send_Sound
|
||||
377:Reset_Ground_Items_and_Objects
|
||||
377:Play_Ambient_Wave
|
||||
377:Skill_Level
|
||||
377:Walkable_Interface
|
||||
377:Construct_Map_Region
|
||||
377:Input_Amount
|
||||
377:Create_Static_Graphic
|
||||
377:Send_Message
|
||||
377:Camera_Shake
|
||||
377:Open_Welcome_Screen
|
||||
377:Send_Add_Friend
|
||||
377:Send_Ground_Item
|
||||
377:Reset_Button_State
|
||||
377:Run_Energy
|
||||
377:Initialize_Player
|
||||
377:Inventory_Overlay
|
||||
443_Protocol
|
||||
Class_Check
|
||||
464_Protocol
|
||||
468_Protocol
|
||||
474_Protocol
|
||||
474:Remove_ignore
|
||||
474:Fourth_Interface_Option
|
||||
508_Protocol
|
||||
634_Protocol
|
||||
666_Protocol
|
||||
668_Protocol
|
||||
718_Protocol
|
||||
718:Friends_packet
|
||||
718:Close_window
|
||||
718:Player_under_NPC_priority
|
||||
718:Music_effect
|
||||
718:Interface
|
||||
718:Open_URL
|
||||
742_Protocol
|
||||
Category:RS3
|
||||
Category:Cache
|
||||
Archive_Format
|
||||
JAGGRAB_Protocol
|
||||
Ondemand_Protocol
|
||||
Map_Region_System
|
||||
317:Mage_NPC
|
||||
317:Player_Option
|
51
mediawiki2gollum.sh
Executable file
51
mediawiki2gollum.sh
Executable file
@ -0,0 +1,51 @@
|
||||
#!/bin/bash
|
||||
cd "$(dirname "$0")"
|
||||
|
||||
commit(){
|
||||
git add .
|
||||
git commit -m "$1"
|
||||
}
|
||||
|
||||
rm -rf ./wiki/
|
||||
ruby mw-to-gollum.rb -f RSWiki-20150610160818.xml -d ./wiki/
|
||||
|
||||
cd ./wiki/
|
||||
|
||||
cp ../.ruby-version ./
|
||||
commit 'Added .ruby-version'
|
||||
|
||||
cp Main-Page.mediawiki Home.mediawiki
|
||||
commit 'Copied Main-Page to Home'
|
||||
|
||||
sed -ri 's/\[\[:([^]]*)\]\]/[[\1]]/g' *
|
||||
commit 'Fix links: Remove leading :'
|
||||
|
||||
grep -ho '\[\[[^|]*|' * | sort | uniq | grep '[_:]' | while read line
|
||||
do
|
||||
sed_line=$(echo "$line" | sed -e 's/\[/\\[/g')
|
||||
rep_line=$(echo "$line" | tr ':' ' ' | tr '_' ' ')
|
||||
sed -i "s/$sed_line/$rep_line/g" *
|
||||
done
|
||||
commit 'Fix links: Change underscores and colons to spaces where there is a link name'
|
||||
|
||||
grep -ho '\[\[[^:]*:[^]]*\]\]' * | sort | uniq | while read line
|
||||
do
|
||||
sed_line=$(echo "$line" | sed -e 's/\[/\\[/g')
|
||||
rep_line=$(echo "$line" | tr ':' ' ')
|
||||
sed -i "s/$sed_line/$rep_line/g" *
|
||||
done
|
||||
commit 'Fix links: Change colons to spaces where there is no link name'
|
||||
|
||||
sed -i '1i [[Category Packet]]' 194-* 317-* 377-* 474-* 474-*
|
||||
sed -i '1i [[Category Packet 194]]' 194-*
|
||||
sed -i '1i [[Category Packet 317]]' 317-*
|
||||
sed -i '1i [[Category Packet 377]]' 377-*
|
||||
sed -i '1i [[Category Packet 474]]' 474-*
|
||||
sed -i '1i [[Category Packet 718]]' 718-*
|
||||
commit 'Add categories to packets'
|
||||
|
||||
cp ../category.sh ./
|
||||
./category.sh
|
||||
commit 'Add category.sh and generate initial category pages'
|
||||
|
||||
/home/mopar/apps/rbenv/versions/1.9.3-p392/bin/gollum --no-edit
|
51
mw-to-gollum.rb
Normal file
51
mw-to-gollum.rb
Normal file
@ -0,0 +1,51 @@
|
||||
require 'rubygems'
|
||||
require 'hpricot'
|
||||
require 'gollum'
|
||||
require 'gollum-lib'
|
||||
require 'optparse'
|
||||
require 'git'
|
||||
|
||||
# from https://gist.github.com/MasterRoot24/ab85de0e7b82ba7f5974
|
||||
# gem install hpricot gollum git wikicloth
|
||||
|
||||
# Parse command line options
|
||||
# ToDo: Make command line options mandatory
|
||||
options = {}
|
||||
OptionParser.new do |opts|
|
||||
opts.banner = 'Usage: ruby mw-to-gollum.rb --file input-file.xml --directory new.wiki'
|
||||
opts.on('-f FILE', '--file FILE', 'MediaWiki export file to import') do |v|
|
||||
options[:file] = v
|
||||
end
|
||||
opts.on('-d DIRECTORY', '--directory DIRECTORY', 'Destination directory in which to create a new Gollum wiki') do |v|
|
||||
options[:destination] = v
|
||||
end
|
||||
end.parse!
|
||||
|
||||
# Open the input file and create the output repo if it doesn't already exist
|
||||
file = File.open(options[:file], 'r')
|
||||
git = Git.init(options[:destination])
|
||||
wiki = Gollum::Wiki.new(options[:destination])
|
||||
doc = Hpricot(file)
|
||||
|
||||
# Get the Git user name and email
|
||||
name = git.config('user.name')
|
||||
email = git.config('user.email')
|
||||
|
||||
# Loop through each page in the MediaWiki dump file and create a new page in the Gollum wiki
|
||||
doc.search('/mediawiki/page').each do |el|
|
||||
title = el.at('title').inner_text.tr(":", " ")
|
||||
content = el.at('text').inner_text
|
||||
commit = { :message => "Import MediaWiki page #{title} into Gollum",
|
||||
:name => name,
|
||||
:email => email}
|
||||
begin
|
||||
puts "Writing page #{title}"
|
||||
wiki.write_page(title, :mediawiki, content, commit)
|
||||
rescue Gollum::DuplicatePageError
|
||||
puts "Duplicate #{title}"
|
||||
rescue Exception
|
||||
puts $!, $@
|
||||
end
|
||||
end
|
||||
|
||||
file.close
|
Loading…
Reference in New Issue
Block a user