mirror of
https://github.com/moparisthebest/rswiki
synced 2024-12-21 15:18:55 -05:00
Add tools to convert
This commit is contained in:
parent
a729ec2114
commit
d93c62b0d7
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
wiki/
|
1
.ruby-version
Normal file
1
.ruby-version
Normal file
@ -0,0 +1 @@
|
|||||||
|
1.9.3-p392
|
11
README.md
11
README.md
@ -1 +1,10 @@
|
|||||||
This is the wiki hosted at rswiki.moparisthebest.com
|
This is the wiki hosted at rswiki.moparisthebest.com, in the wiki git repo.
|
||||||
|
|
||||||
|
In this repo you'll find the tools we used for converting from mediawiki to gollum:
|
||||||
|
|
||||||
|
* legit_pages.py was written by vortex, and used to scrape and generate legit_pages.txt, which was used to export RSWiki-20150610160818.xml
|
||||||
|
* mw-to-gollum.rb was slightly modified from here: https://gist.github.com/MasterRoot24/ab85de0e7b82ba7f5974
|
||||||
|
* mediawiki2gollum.sh uses mw-to-gollum.rb to convert the mediawiki xml, then does various things to clean up all the links and names so they will work
|
||||||
|
* category.sh scrapes and generates category pages like mediawiki, needs to be ran whenever pages are added to categories
|
||||||
|
|
||||||
|
todo: historical versions not converted/saved yet
|
17657
RSWiki-20150610160818.xml
Normal file
17657
RSWiki-20150610160818.xml
Normal file
File diff suppressed because it is too large
Load Diff
52
category.sh
Executable file
52
category.sh
Executable file
@ -0,0 +1,52 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
cd "$(dirname "$0")"
|
||||||
|
|
||||||
|
function prepareFile(){
|
||||||
|
path="$1"
|
||||||
|
tmp_path="$(basename "$path").tmp"
|
||||||
|
(
|
||||||
|
grep 'CODE AUTOMATICALLY GENERATED BY category.sh -->' "$path" &>/dev/null
|
||||||
|
if [ $? -eq 0 ]
|
||||||
|
then
|
||||||
|
sed -n '/CODE AUTOMATICALLY GENERATED BY category.sh -->/q;p' "$path"
|
||||||
|
else
|
||||||
|
cat "$path" && echo
|
||||||
|
fi
|
||||||
|
echo -e '<!-- DO NOT EDIT BELOW THIS LINE, OR CHANGE THIS COMMENT, CODE AUTOMATICALLY GENERATED BY category.sh -->'
|
||||||
|
) > "$tmp_path"
|
||||||
|
echo "$tmp_path"
|
||||||
|
}
|
||||||
|
|
||||||
|
function finishFile(){
|
||||||
|
path="$1"
|
||||||
|
tmp_path="$(basename "$path").tmp"
|
||||||
|
mv "$tmp_path" "$path"
|
||||||
|
}
|
||||||
|
|
||||||
|
files_lines_categories="$(grep '^\[\[Category [^]./\]*\]\]$' *)"
|
||||||
|
#echo "files_lines_categories: $files_lines_categories"
|
||||||
|
|
||||||
|
echo '<!-- DO NOT EDIT THIS FILE, CODE AUTOMATICALLY GENERATED BY category.sh -->' > Categories.mediawiki
|
||||||
|
echo 'The following categories contain pages or media.' >> Categories.mediawiki
|
||||||
|
|
||||||
|
echo "$files_lines_categories" | grep -o 'Category [^]]*' | sort | uniq | while read category
|
||||||
|
do
|
||||||
|
#echo "category: $category"
|
||||||
|
|
||||||
|
category_file="$(echo "$category" | sed 's/ /-/g').mediawiki"
|
||||||
|
echo "category_file: $category_file"
|
||||||
|
|
||||||
|
result="$(prepareFile "$category_file")"
|
||||||
|
echo "== '''Pages in category \"$(echo $category | sed 's/Category //')\"''' ==" >> "$result"
|
||||||
|
files="$(echo "$files_lines_categories" | grep ":\[\[${category}\]\]$" | sort)"
|
||||||
|
num_pages="$(echo "$files" | wc -l)"
|
||||||
|
|
||||||
|
echo -e "* [[$category]] ($num_pages members)" >> Categories.mediawiki
|
||||||
|
|
||||||
|
echo "The following $num_pages pages are in this category." >> "$result"
|
||||||
|
echo "$files" | while read file
|
||||||
|
do
|
||||||
|
echo "* [[$(echo $file | sed -e "s/\.mediawiki:\[\[${category}\]\]$//" -e 's/-/ /g')]]"
|
||||||
|
done >> "$result"
|
||||||
|
finishFile "$category_file"
|
||||||
|
done
|
34
legit_pages.py
Normal file
34
legit_pages.py
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
import requests, logging
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from queue import Queue
|
||||||
|
from threading import Thread
|
||||||
|
|
||||||
|
logging.getLogger("requests").setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
|
||||||
|
def get_pages(page):
|
||||||
|
global all_pages
|
||||||
|
if page in all_pages:
|
||||||
|
return
|
||||||
|
all_pages.append(page)
|
||||||
|
ebin.write(page + "\n")
|
||||||
|
print(page + "\n")
|
||||||
|
req = requests.get("https://rswiki.moparisthebest.com/index.php?title=" + page)
|
||||||
|
soup = BeautifulSoup(req.text)
|
||||||
|
content = soup.find("div", id="mw-content-text")
|
||||||
|
links = content.find_all("a")
|
||||||
|
for link in links:
|
||||||
|
link = link["href"]
|
||||||
|
off = link.find("?title=")
|
||||||
|
if off is -1:
|
||||||
|
continue
|
||||||
|
next_page = link[off + 7:]
|
||||||
|
if "#" in next_page:
|
||||||
|
next_page = next_page[:next_page.find("#")]
|
||||||
|
if "&" not in next_page and "Special:" not in next_page and next_page != page:
|
||||||
|
get_pages(next_page)
|
||||||
|
|
||||||
|
|
||||||
|
ebin = open("legit_pages.txt", "w")
|
||||||
|
all_pages = []
|
||||||
|
pages = get_pages("Main_Page")
|
190
legit_pages.txt
Normal file
190
legit_pages.txt
Normal file
@ -0,0 +1,190 @@
|
|||||||
|
Main_Page
|
||||||
|
Rules
|
||||||
|
DMCA_Policy
|
||||||
|
RSWiki:General_disclaimer
|
||||||
|
RSWiki:Privacy_policy
|
||||||
|
RSWiki_IRC
|
||||||
|
RSWiki:About
|
||||||
|
Category:RSC
|
||||||
|
135_Protocol
|
||||||
|
202_Protocol
|
||||||
|
204_Items
|
||||||
|
204_NPCs
|
||||||
|
204_Objects
|
||||||
|
204_Protocol
|
||||||
|
OB3
|
||||||
|
Category:RS2
|
||||||
|
194_Protocol
|
||||||
|
194:Show_interface
|
||||||
|
Data_Types
|
||||||
|
Word
|
||||||
|
DWord
|
||||||
|
QWord
|
||||||
|
RS_String
|
||||||
|
194:Logout
|
||||||
|
194:Clear_screen
|
||||||
|
289_Protocol
|
||||||
|
289:Send_sidebar_interface
|
||||||
|
317:Send_Skill
|
||||||
|
289:Send_Player_Head
|
||||||
|
317:Friends_list_status
|
||||||
|
289:Construct_Map_Region
|
||||||
|
317_Protocol
|
||||||
|
Category:Packet:317
|
||||||
|
289:Player_Dialogue_Head
|
||||||
|
317:Add_friend
|
||||||
|
317:Add_ignore
|
||||||
|
317:Alternate_item_option_2
|
||||||
|
317:Animation_reset
|
||||||
|
317:Attack_(NPC)
|
||||||
|
317:Audio
|
||||||
|
317:Bank_10_items
|
||||||
|
317:Bank_5_items
|
||||||
|
317:Bank_all_items
|
||||||
|
317:Bank_X_items_part-1
|
||||||
|
317:Bank_X_items_part-2
|
||||||
|
317:Button_click
|
||||||
|
317:Camera_movement
|
||||||
|
317:Camera_shake
|
||||||
|
317:Chat_interface
|
||||||
|
317:Chat_interface_click
|
||||||
|
317:Chat_settings
|
||||||
|
317:Clear_inventory
|
||||||
|
317:Clear_screen
|
||||||
|
317:Close_window
|
||||||
|
317:Construct_map_region
|
||||||
|
317:Design_screen
|
||||||
|
317:Display_hint_icon
|
||||||
|
317:Drop_item
|
||||||
|
317:Enter_name
|
||||||
|
317:Equip_item
|
||||||
|
317:Flash_sidebar
|
||||||
|
317:Focus_change
|
||||||
|
317:Follow
|
||||||
|
317:Force_client_setting
|
||||||
|
317:Hidden_Interface
|
||||||
|
317:Idle
|
||||||
|
317:Idle_logout
|
||||||
|
317:Initialize_player
|
||||||
|
317:Input_amount
|
||||||
|
317:Interface_animation
|
||||||
|
317:Interface_color
|
||||||
|
317:Interface_item
|
||||||
|
317:Interface_model_rotation
|
||||||
|
317:Interface_offset
|
||||||
|
317:Interface_over_tab
|
||||||
|
317:Inventory_overlay
|
||||||
|
317:Item_action_1
|
||||||
|
317:Item_on_floor
|
||||||
|
317:Item_on_item
|
||||||
|
317:Item_on_object
|
||||||
|
317:Item_on_player
|
||||||
|
317:Light_item
|
||||||
|
317:Load_map_region
|
||||||
|
317:Loading_finished
|
||||||
|
317:Logout
|
||||||
|
317:Magic_on_items
|
||||||
|
317:Magic_on_player
|
||||||
|
317:Minimap_State
|
||||||
|
317:Move_item
|
||||||
|
317:Music
|
||||||
|
317:NPC_action_1
|
||||||
|
317:NPC_action_2
|
||||||
|
317:NPC_action_3
|
||||||
|
317:NPC_head_on_interface
|
||||||
|
317:Object_action_1
|
||||||
|
317:Object_action_2
|
||||||
|
317:Object_action_3
|
||||||
|
317:Open_chatbox_interface
|
||||||
|
317:Open_welcome_screen
|
||||||
|
317:Pickup_ground_item
|
||||||
|
317:Play_song
|
||||||
|
317:Player_command
|
||||||
|
317:Player_head_to_interface
|
||||||
|
317:Privacy_options
|
||||||
|
317:Region_change
|
||||||
|
317:Remove_friend
|
||||||
|
317:Remove_ignore
|
||||||
|
317:Report_player
|
||||||
|
317:Reset_button_state
|
||||||
|
317:Reset_camera
|
||||||
|
317:Reset_destination
|
||||||
|
317:Run_energy
|
||||||
|
317:Scroll_position
|
||||||
|
317:Send_add_ignore
|
||||||
|
317:Send_message
|
||||||
|
317:Send_sidebar_interface
|
||||||
|
317:Set_interface_text
|
||||||
|
317:Show_interface
|
||||||
|
317:Show_multi-combat
|
||||||
|
317:Show_tab
|
||||||
|
317:Skill_level
|
||||||
|
317:Song_Queue
|
||||||
|
317:System_update
|
||||||
|
317:Trade_answer
|
||||||
|
317:Trade_request
|
||||||
|
317:Unequip_item
|
||||||
|
317:Update_item_container
|
||||||
|
317:Walkable_interface
|
||||||
|
317:Weight
|
||||||
|
317:Send_add_friend
|
||||||
|
317:Begin_player_updating
|
||||||
|
317:Object_removal
|
||||||
|
317:Create_Projectile
|
||||||
|
317:Object_spawn
|
||||||
|
317:Send_private_message
|
||||||
|
317:NPC_Dialogue
|
||||||
|
317:Mouse_click
|
||||||
|
317:Ground_Item_Action
|
||||||
|
357_Protocol
|
||||||
|
377_Protocol
|
||||||
|
377:Interface_Animation
|
||||||
|
377:Move_Camera
|
||||||
|
377:Logout
|
||||||
|
377:Send_Sidebar_Interface
|
||||||
|
377:Animation_Reset
|
||||||
|
377:Interface_Item
|
||||||
|
377:Send_Sound
|
||||||
|
377:Reset_Ground_Items_and_Objects
|
||||||
|
377:Play_Ambient_Wave
|
||||||
|
377:Skill_Level
|
||||||
|
377:Walkable_Interface
|
||||||
|
377:Construct_Map_Region
|
||||||
|
377:Input_Amount
|
||||||
|
377:Create_Static_Graphic
|
||||||
|
377:Send_Message
|
||||||
|
377:Camera_Shake
|
||||||
|
377:Open_Welcome_Screen
|
||||||
|
377:Send_Add_Friend
|
||||||
|
377:Send_Ground_Item
|
||||||
|
377:Reset_Button_State
|
||||||
|
377:Run_Energy
|
||||||
|
377:Initialize_Player
|
||||||
|
377:Inventory_Overlay
|
||||||
|
443_Protocol
|
||||||
|
Class_Check
|
||||||
|
464_Protocol
|
||||||
|
468_Protocol
|
||||||
|
474_Protocol
|
||||||
|
474:Remove_ignore
|
||||||
|
474:Fourth_Interface_Option
|
||||||
|
508_Protocol
|
||||||
|
634_Protocol
|
||||||
|
666_Protocol
|
||||||
|
668_Protocol
|
||||||
|
718_Protocol
|
||||||
|
718:Friends_packet
|
||||||
|
718:Close_window
|
||||||
|
718:Player_under_NPC_priority
|
||||||
|
718:Music_effect
|
||||||
|
718:Interface
|
||||||
|
718:Open_URL
|
||||||
|
742_Protocol
|
||||||
|
Category:RS3
|
||||||
|
Category:Cache
|
||||||
|
Archive_Format
|
||||||
|
JAGGRAB_Protocol
|
||||||
|
Ondemand_Protocol
|
||||||
|
Map_Region_System
|
||||||
|
317:Mage_NPC
|
||||||
|
317:Player_Option
|
51
mediawiki2gollum.sh
Executable file
51
mediawiki2gollum.sh
Executable file
@ -0,0 +1,51 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
cd "$(dirname "$0")"
|
||||||
|
|
||||||
|
commit(){
|
||||||
|
git add .
|
||||||
|
git commit -m "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
rm -rf ./wiki/
|
||||||
|
ruby mw-to-gollum.rb -f RSWiki-20150610160818.xml -d ./wiki/
|
||||||
|
|
||||||
|
cd ./wiki/
|
||||||
|
|
||||||
|
cp ../.ruby-version ./
|
||||||
|
commit 'Added .ruby-version'
|
||||||
|
|
||||||
|
cp Main-Page.mediawiki Home.mediawiki
|
||||||
|
commit 'Copied Main-Page to Home'
|
||||||
|
|
||||||
|
sed -ri 's/\[\[:([^]]*)\]\]/[[\1]]/g' *
|
||||||
|
commit 'Fix links: Remove leading :'
|
||||||
|
|
||||||
|
grep -ho '\[\[[^|]*|' * | sort | uniq | grep '[_:]' | while read line
|
||||||
|
do
|
||||||
|
sed_line=$(echo "$line" | sed -e 's/\[/\\[/g')
|
||||||
|
rep_line=$(echo "$line" | tr ':' ' ' | tr '_' ' ')
|
||||||
|
sed -i "s/$sed_line/$rep_line/g" *
|
||||||
|
done
|
||||||
|
commit 'Fix links: Change underscores and colons to spaces where there is a link name'
|
||||||
|
|
||||||
|
grep -ho '\[\[[^:]*:[^]]*\]\]' * | sort | uniq | while read line
|
||||||
|
do
|
||||||
|
sed_line=$(echo "$line" | sed -e 's/\[/\\[/g')
|
||||||
|
rep_line=$(echo "$line" | tr ':' ' ')
|
||||||
|
sed -i "s/$sed_line/$rep_line/g" *
|
||||||
|
done
|
||||||
|
commit 'Fix links: Change colons to spaces where there is no link name'
|
||||||
|
|
||||||
|
sed -i '1i [[Category Packet]]' 194-* 317-* 377-* 474-* 474-*
|
||||||
|
sed -i '1i [[Category Packet 194]]' 194-*
|
||||||
|
sed -i '1i [[Category Packet 317]]' 317-*
|
||||||
|
sed -i '1i [[Category Packet 377]]' 377-*
|
||||||
|
sed -i '1i [[Category Packet 474]]' 474-*
|
||||||
|
sed -i '1i [[Category Packet 718]]' 718-*
|
||||||
|
commit 'Add categories to packets'
|
||||||
|
|
||||||
|
cp ../category.sh ./
|
||||||
|
./category.sh
|
||||||
|
commit 'Add category.sh and generate initial category pages'
|
||||||
|
|
||||||
|
/home/mopar/apps/rbenv/versions/1.9.3-p392/bin/gollum --no-edit
|
51
mw-to-gollum.rb
Normal file
51
mw-to-gollum.rb
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
require 'rubygems'
|
||||||
|
require 'hpricot'
|
||||||
|
require 'gollum'
|
||||||
|
require 'gollum-lib'
|
||||||
|
require 'optparse'
|
||||||
|
require 'git'
|
||||||
|
|
||||||
|
# from https://gist.github.com/MasterRoot24/ab85de0e7b82ba7f5974
|
||||||
|
# gem install hpricot gollum git wikicloth
|
||||||
|
|
||||||
|
# Parse command line options
|
||||||
|
# ToDo: Make command line options mandatory
|
||||||
|
options = {}
|
||||||
|
OptionParser.new do |opts|
|
||||||
|
opts.banner = 'Usage: ruby mw-to-gollum.rb --file input-file.xml --directory new.wiki'
|
||||||
|
opts.on('-f FILE', '--file FILE', 'MediaWiki export file to import') do |v|
|
||||||
|
options[:file] = v
|
||||||
|
end
|
||||||
|
opts.on('-d DIRECTORY', '--directory DIRECTORY', 'Destination directory in which to create a new Gollum wiki') do |v|
|
||||||
|
options[:destination] = v
|
||||||
|
end
|
||||||
|
end.parse!
|
||||||
|
|
||||||
|
# Open the input file and create the output repo if it doesn't already exist
|
||||||
|
file = File.open(options[:file], 'r')
|
||||||
|
git = Git.init(options[:destination])
|
||||||
|
wiki = Gollum::Wiki.new(options[:destination])
|
||||||
|
doc = Hpricot(file)
|
||||||
|
|
||||||
|
# Get the Git user name and email
|
||||||
|
name = git.config('user.name')
|
||||||
|
email = git.config('user.email')
|
||||||
|
|
||||||
|
# Loop through each page in the MediaWiki dump file and create a new page in the Gollum wiki
|
||||||
|
doc.search('/mediawiki/page').each do |el|
|
||||||
|
title = el.at('title').inner_text.tr(":", " ")
|
||||||
|
content = el.at('text').inner_text
|
||||||
|
commit = { :message => "Import MediaWiki page #{title} into Gollum",
|
||||||
|
:name => name,
|
||||||
|
:email => email}
|
||||||
|
begin
|
||||||
|
puts "Writing page #{title}"
|
||||||
|
wiki.write_page(title, :mediawiki, content, commit)
|
||||||
|
rescue Gollum::DuplicatePageError
|
||||||
|
puts "Duplicate #{title}"
|
||||||
|
rescue Exception
|
||||||
|
puts $!, $@
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
file.close
|
Loading…
Reference in New Issue
Block a user